diff --git a/Makefile.in b/Makefile.in index cfdca3d18e1deed8f4c830403349295270556414..23b6fe4ea68899ef0c94b712b42fee3e7fb252d4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -640,6 +640,7 @@ all: @target_makefile_frag@ @alphaieee_frag@ @ospace_frag@ +@sw_64ieee_frag@ @host_makefile_frag@ ### diff --git a/Makefile.tpl b/Makefile.tpl index efed1511750412f7dff03a363779b78c6ecccb41..ebe66c5467b7e02d853c10c84951e4329eea7a49 100644 --- a/Makefile.tpl +++ b/Makefile.tpl @@ -563,6 +563,7 @@ all: @target_makefile_frag@ @alphaieee_frag@ @ospace_frag@ +@sw_64ieee_frag@ @host_makefile_frag@ ### diff --git a/config.guess b/config.guess index 97ad0733304d51c825cb2abbc5db47d31d32c0ef..52cad983c53ef7a4fa77bbdcf3c9af78fc29cd4e 100644 --- a/config.guess +++ b/config.guess @@ -1083,6 +1083,18 @@ EOF sparc:Linux:*:* | sparc64:Linux:*:*) echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; + sw_64:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + SW6) UNAME_MACHINE=sw_64sw6 ;; + SW6A) UNAME_MACHINE=sw_64sw6a ;; + SW6B) UNAME_MACHINE=sw_64sw6b ;; + SW8A) UNAME_MACHINE=sw_64sw8a ;; + SW) UNAME_MACHINE=sw_64 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; tile*:Linux:*:*) echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; diff --git a/config.sub b/config.sub index a318a46868500fbeea993e693e32701041ffad1b..aa418e7b930376dac6f3a70791e867991c380e15 100644 --- a/config.sub +++ b/config.sub @@ -1237,6 +1237,7 @@ case $cpu-$vendor in | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \ | spu \ + | sw_64 | sw_64sw6a | sw_64sw6b | sw_64sw8a \ | tahoe \ | tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \ | tron \ diff --git a/config/intdiv0.m4 b/config/intdiv0.m4 index 55dddcf1c24c26a63f303c48ac6ff33329404f99..53dc632bce844c980c7813a43a73fc01f3396e84 100644 --- a/config/intdiv0.m4 +++ b/config/intdiv0.m4 @@ -56,7 +56,7 @@ int main () [ # Guess based on the CPU. case "$host_cpu" in - alpha* | i[34567]86 | m68k | s390*) + alpha* | i[34567]86 | m68k | s390* | sw_64* ) gt_cv_int_divbyzero_sigfpe="guessing yes";; *) gt_cv_int_divbyzero_sigfpe="guessing no";; diff --git a/config/tcl.m4 b/config/tcl.m4 index 4542a4b23d7239e1d37d44d0961382442f474bd2..c58bf534363ad2ada6120b36d9ddc73eabb455c9 100644 --- a/config/tcl.m4 +++ b/config/tcl.m4 @@ -1368,6 +1368,9 @@ dnl AC_CHECK_TOOL(AR, ar) if test "`uname -m`" = "alpha" ; then CFLAGS="$CFLAGS -mieee" fi + if test "`uname -m`" = "sw_64" ; then + CFLAGS="$CFLAGS -mieee" + fi if test $do64bit = yes; then AC_CACHE_CHECK([if compiler accepts -m64 flag], tcl_cv_cc_m64, [ hold_cflags=$CFLAGS @@ -1418,6 +1421,9 @@ dnl AC_CHECK_TOOL(AR, ar) if test "`uname -m`" = "alpha" ; then CFLAGS="$CFLAGS -mieee" fi + if test "`uname -m`" = "sw_64" ; then + CFLAGS="$CFLAGS -mieee" + fi ;; Lynx*) SHLIB_CFLAGS="-fPIC" diff --git a/configure b/configure index 97d5ca4fc0498f8e6074908a2a43cdd19e2e797d..1bee61dc764437da86d754ab6f4f304408c06605 100755 --- a/configure +++ b/configure @@ -777,6 +777,7 @@ ac_subst_files='serialization_dependencies host_makefile_frag target_makefile_frag alphaieee_frag +sw_64ieee_frag ospace_frag' ac_user_opts=' enable_option_checking @@ -4010,6 +4011,10 @@ case "${target}" in use_gnu_ld=no fi ;; + sw_64*-*-*) + # newlib is not 64 bit ready + noconfigdirs="$noconfigdirs target-newlib target-libgloss" + ;; tic6x-*-*) noconfigdirs="$noconfigdirs sim" ;; @@ -7161,6 +7166,15 @@ case $target in ;; esac +sw_64ieee_frag=/dev/null +case $target in + sw_64*-*-*) + # This just makes sure to use the -mieee option to build target libs. + # This should probably be set individually by each library. + sw_64ieee_frag="config/mt-sw_64ieee" + ;; +esac + # If --enable-target-optspace always use -Os instead of -O2 to build # the target libraries, similarly if it is not specified, use -Os # on selected platforms. @@ -7856,7 +7870,7 @@ case "${target}" in esac # Makefile fragments. -for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag; +for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag; do eval fragval=\$$frag if test $fragval != /dev/null; then diff --git a/configure.ac b/configure.ac index 90ccd5ef8a85e8f36658cd5c81924ca346a01eab..1e3cd04d53f3f2a6afb5ca388d1f11754fc3e044 100644 --- a/configure.ac +++ b/configure.ac @@ -1283,6 +1283,10 @@ case "${target}" in use_gnu_ld=no fi ;; + sw_64*-*-*) + # newlib is not 64 bit ready + noconfigdirs="$noconfigdirs target-newlib target-libgloss" + ;; tic6x-*-*) noconfigdirs="$noconfigdirs sim" ;; @@ -1342,6 +1346,9 @@ case "${host}" in rs6000-*-aix*) host_makefile_frag="config/mh-ppc-aix" ;; + sw_64*-linux*) + host_makefile_frag="config/mh-sw_64-linux" + ;; esac fi @@ -2666,6 +2673,15 @@ case $target in ;; esac +sw_64ieee_frag=/dev/null +case $target in + sw_64*-*-*) + # This just makes sure to use the -mieee option to build target libs. + # This should probably be set individually by each library. + sw_64ieee_frag="config/mt-sw_64ieee" + ;; +esac + # If --enable-target-optspace always use -Os instead of -O2 to build # the target libraries, similarly if it is not specified, use -Os # on selected platforms. @@ -3356,7 +3372,7 @@ case "${target}" in esac # Makefile fragments. -for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag; +for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag; do eval fragval=\$$frag if test $fragval != /dev/null; then @@ -3366,6 +3382,7 @@ done AC_SUBST_FILE(host_makefile_frag) AC_SUBST_FILE(target_makefile_frag) AC_SUBST_FILE(alphaieee_frag) +AC_SUBST_FILE(sw_64ieee_frag) AC_SUBST_FILE(ospace_frag) # Miscellanea: directories, flags, etc. diff --git a/contrib/compare-all-tests b/contrib/compare-all-tests index 502cc64f52270c19b4086b3d660fedaf928e5a31..02519a1f3e859d26a56011a1ef627dec1b56c906 100644 --- a/contrib/compare-all-tests +++ b/contrib/compare-all-tests @@ -33,8 +33,9 @@ ppc_opts='-m32 -m64' s390_opts='-m31 -m31/-mzarch -m64' sh_opts='-m3 -m3e -m4 -m4a -m4al -m4/-mieee -m1 -m1/-mno-cbranchdi -m2a -m2a/-mieee -m2e -m2e/-mieee' sparc_opts='-mcpu=v8/-m32 -mcpu=v9/-m32 -m64' +sw_64_opts='-mlong-double-64/-mieee -mlong-double-64 -mlong-double-128/-mieee -mlong-double-128' -all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc v850 vax xstormy16 xtensa' # e500 +all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc sw_64 v850 vax xstormy16 xtensa' # e500 test_one_file () { diff --git a/contrib/config-list.mk b/contrib/config-list.mk index d154286a497cb0c8492892b8ee52cd489efac3e8..0a8fbf0e7e0f928aa975dd6f5e4482707eba8cf0 100644 --- a/contrib/config-list.mk +++ b/contrib/config-list.mk @@ -92,6 +92,7 @@ LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \ sparc64-sun-solaris2.11OPT-with-gnu-ldOPT-with-gnu-asOPT-enable-threads=posix \ sparc-wrs-vxworks sparc64-elf sparc64-rtems sparc64-linux sparc64-freebsd6 \ sparc64-netbsd sparc64-openbsd \ + sw_64-linux-gnu sw_64-netbsd sw_64-openbsd \ tilegx-linux-gnu tilegxbe-linux-gnu tilepro-linux-gnu \ v850e-elf v850-elf v850-rtems vax-linux-gnu \ vax-netbsdelf vax-openbsd visium-elf x86_64-apple-darwin \ diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c index 7d0d91403f3856ceb5acd6768a14d929f4ec4b33..167e8c9af064bac3488fddb8cb42121c04c932cc 100644 --- a/gcc/auto-inc-dec.c +++ b/gcc/auto-inc-dec.c @@ -892,6 +892,10 @@ parse_add_or_inc (rtx_insn *insn, bool before_mem) inc_insn.reg1_val = -INTVAL (XEXP (SET_SRC (pat), 1)); inc_insn.reg1 = GEN_INT (inc_insn.reg1_val); } +#ifdef FLAG_SW64_INC_DEC + if (inc_insn.reg1_val > 2047 || inc_insn.reg1_val < -2048) + return false; +#endif return true; } else if ((HAVE_PRE_MODIFY_REG || HAVE_POST_MODIFY_REG) @@ -1369,6 +1373,10 @@ find_mem (rtx *address_of_x) mem_insn.reg1_is_const = true; /* Match with *(reg0 + c) where c is a const. */ mem_insn.reg1_val = INTVAL (reg1); +#ifdef FLAG_SW64_INC_DEC + if (mem_insn.reg1_val > 2047 || mem_insn.reg1_val < -2048) + return false; +#endif if (find_inc (true)) return true; } @@ -1697,7 +1705,11 @@ public: if (!AUTO_INC_DEC) return false; +#ifdef FLAG_SW64_INC_DEC + return (optimize > 0 && flag_auto_inc_dec && flag_sw_auto_inc_dec); +#else return (optimize > 0 && flag_auto_inc_dec); +#endif } diff --git a/gcc/builtins.c b/gcc/builtins.c index ffbb2cae9eeefc1ef119c6985f472ff6e0f2cf46..8f319ceab91bed9400231165ab269d5c0e7e2c44 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -7460,6 +7460,17 @@ expand_builtin_sync_synchronize (void) expand_mem_thread_fence (MEMMODEL_SYNC_SEQ_CST); } +#ifdef FLAG_SW64_WMEMB +static void +expand_builtin_sync_synchronize_write (void) +{ + if (TARGET_SW8A && targetm.have_memory_barrier ()) + emit_insn (targetm.gen_write_memory_barrier ()); + else + error ("Current arch don't support write memory barrier !!!"); +} +#endif + static rtx expand_builtin_thread_pointer (tree exp, rtx target) { @@ -8678,6 +8689,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, expand_builtin_sync_synchronize (); return const0_rtx; +#ifdef FLAG_SW64_WMEMB + case BUILT_IN_SYNC_SYNCHRONIZE_WRITE: + expand_builtin_sync_synchronize_write (); + return const0_rtx; +#endif case BUILT_IN_ATOMIC_EXCHANGE_1: case BUILT_IN_ATOMIC_EXCHANGE_2: case BUILT_IN_ATOMIC_EXCHANGE_4: diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c index dc1a898487194c91371b9e326cd53bfb23b39d50..bdab4928cb1cca0d9b6547a8a1eb29b30c2ce14e 100644 --- a/gcc/c-family/c-opts.c +++ b/gcc/c-family/c-opts.c @@ -751,7 +751,14 @@ default_handle_c_option (size_t code ATTRIBUTE_UNUSED, const char *arg ATTRIBUTE_UNUSED, int value ATTRIBUTE_UNUSED) { +#if defined FLAG_SW64_SIMD || defined FLAG_SW64_M32 + if (code == OPT_msimd || code == OPT_m32 || code == OPT_msw_use_32align) + return true; + else + return false; +#else return false; +#endif } /* Post-switch processing. */ diff --git a/gcc/common/config/sw_64/sw_64-common.c b/gcc/common/config/sw_64/sw_64-common.c new file mode 100644 index 0000000000000000000000000000000000000000..eaf1f0d32d89e292fa928a5bebbc6507806aa762 --- /dev/null +++ b/gcc/common/config/sw_64/sw_64-common.c @@ -0,0 +1,114 @@ +/* Common hooks for Sw_64. + Copyright (C) 1992-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic-core.h" +#include "tm.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" + +int flag_fpcr_set; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options sw_64_option_optimization_table[] = { + /* Enable redundant extension instructions removal at -O2 and higher. */ + {OPT_LEVELS_2_PLUS, OPT_free, NULL, 1}, + {OPT_LEVELS_NONE, 0, NULL, 0}}; + +/* Implement TARGET_OPTION_INIT_STRUCT. */ + +static void +sw_64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) +{ + opts->x_target_flags |= MASK_IEEE; + global_options.x_flag_prefetch_loop_arrays = 1; +} + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +sw_64_handle_option (struct gcc_options *opts, + struct gcc_options *opts_set ATTRIBUTE_UNUSED, + const struct cl_decoded_option *decoded, location_t loc) +{ + size_t code = decoded->opt_index; + const char *arg = decoded->arg; + int value = decoded->value; + + switch (code) + { + case OPT_mfp_regs: + if (value == 0) + opts->x_target_flags |= MASK_SOFT_FP; + break; + + case OPT_mieee: + case OPT_mieee_with_inexact: + /* add mieee for sw_64. */ + case OPT_mieee_main: + if (code == OPT_mieee) + flag_fpcr_set = 1; + else if (code == OPT_mieee_with_inexact) + flag_fpcr_set = 3; + else if (code == OPT_mieee_main) + flag_fpcr_set = 4; + opts->x_target_flags |= MASK_IEEE_CONFORMANT; + break; + + case OPT_mtls_size_: + if (value != 16 && value != 32 && value != 64) + error_at (loc, "bad value %qs for %<-mtls-size%> switch", arg); + break; + + case OPT_mtls_tlsgd_: + if (value != 16 && value != 32) + error_at (loc, "bad value %qs for -mtls-tlsgd switch", arg); + break; + + case OPT_mtls_tlsldm_: + if (value != 16 && value != 32) + error_at (loc, "bad value %qs for -mtls-tlsldm switch", arg); + break; + + case OPT_mgprel_size_: + if (value != 16 && value != 32) + error_at (loc, "bad value %qs for -mgprel-size switch", arg); + break; + } + + return true; +} + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS \ + (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS) +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION sw_64_handle_option + +#undef TARGET_OPTION_INIT_STRUCT +#define TARGET_OPTION_INIT_STRUCT sw_64_option_init_struct + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE sw_64_option_optimization_table + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index 6fcdd771d4c32604685ebc5da3e20260fe6da2ad..789a406b62f00914e7976a147d10cd3645e3d827 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -552,6 +552,10 @@ sh[123456789lbe]*-*-* | sh-*-*) extra_options="${extra_options} fused-madd.opt" extra_objs="${extra_objs} sh_treg_combine.o sh-mem.o sh_optimize_sett_clrt.o" ;; +sw_64*-*-*) + cpu_type=sw_64 + extra_options="${extra_options} g.opt" + ;; v850*-*-*) cpu_type=v850 ;; @@ -3407,6 +3411,11 @@ sparc64-*-openbsd*) with_cpu=ultrasparc tmake_file="${tmake_file} sparc/t-sparc" ;; +sw_64*-*-linux*) + tm_file="elfos.h ${tm_file} sw_64/gnu-user.h sw_64/elf.h sw_64/linux.h sw_64/linux-elf.h glibc-stdint.h" + tmake_file="${tmake_file} sw_64/t-linux sw_64/t-sw_64" + extra_options="${extra_options} sw_64/elf.opt" + ;; tic6x-*-elf) tm_file="elfos.h ${tm_file} c6x/elf-common.h c6x/elf.h" tm_file="${tm_file} dbxelf.h tm-dwarf2.h newlib-stdint.h" @@ -3937,6 +3946,15 @@ if test x$with_cpu = x ; then ;; esac ;; + sw_64sw6a*-*-*) + with_cpu=sw6a + ;; + sw_64sw6b*-*-*) + with_cpu=sw6b + ;; + sw_64sw8a*-*-*) + with_cpu=sw8a + ;; visium-*-*) with_cpu=gr5 ;; @@ -5147,6 +5165,23 @@ case "${target}" in esac ;; + sw_64*-*-*) + supported_defaults="cpu tune" + for which in cpu tune; do + eval "val=\$with_$which" + case "$val" in + "" \ + | sw6 | sw6a | sw6b \ + | sw8a) + ;; + *) + echo "Unknown CPU used in --with-$which=$val" 1>&2 + exit 1 + ;; + esac + done + ;; + tic6x-*-*) supported_defaults="arch" diff --git a/gcc/config.host b/gcc/config.host index 230ab61ac05b93b1890fce376024f9184a7b4ecf..793cc7b50c7fccda6e9c18607226164f45f9a63b 100644 --- a/gcc/config.host +++ b/gcc/config.host @@ -201,6 +201,14 @@ case ${host} in ;; esac ;; + sw_64*-*-linux*) + case ${target} in + sw_64*-*-linux*) + host_extra_gcc_objs="driver-sw_64.o" + host_xmake_file="${host_xmake_file} sw_64/x-sw_64" + ;; + esac + ;; esac # Machine-specific settings. diff --git a/gcc/config.in b/gcc/config.in index 80b421d99a34dc8dff989f2184bb8d0cded7a90f..20a10eef6593bac0c0bcd854121854d358a369db 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -199,6 +199,10 @@ #undef ENABLE_LD_BUILDID #endif +/* Define if gcc should always pass --no-relax to linker for sw_64. */ +#ifndef USED_FOR_TARGET +#undef ENABLE_LD_NORELAX +#endif /* Define to 1 to enable libquadmath support */ #ifndef USED_FOR_TARGET @@ -394,6 +398,10 @@ #undef HAVE_AS_EXPLICIT_RELOCS #endif +/* Define if your assembler supports explicit relocations. */ +#ifndef USED_FOR_TARGET +#undef SW_64_ENABLE_ASAN +#endif /* Define if your assembler supports FMAF, HPC, and VIS 3.0 instructions. */ #ifndef USED_FOR_TARGET @@ -2508,3 +2516,15 @@ #undef vfork #endif +/* Define only sw64 target. */ +#undef FLAG_SW64_ATOMIC +#undef FLAG_SW64_90139 +#undef FLAG_SW64_PREFETCH +#undef FLAG_SW64_PROTECT +#undef FLAG_SW64_SIMD +#undef FLAG_SW64_AUTOSIMD +#undef FLAG_SW64_M32 +#undef FLAG_SW64_INC_DEC +#undef FLAG_SW64_DELNOP +#undef FLAG_SW64_FM +#undef FLAG_SW64_WMEMB diff --git a/gcc/config/host-linux.c b/gcc/config/host-linux.c index 26872544130dddaf335068b1c3ae6fac3dc2e90a..20522756b45eaabb477750e0acae398cbb4c9770 100644 --- a/gcc/config/host-linux.c +++ b/gcc/config/host-linux.c @@ -84,6 +84,8 @@ # define TRY_EMPTY_VM_SPACE 0x8000000000 #elif defined(__sparc__) # define TRY_EMPTY_VM_SPACE 0x60000000 +#elif defined(__sw_64) +# define TRY_EMPTY_VM_SPACE 0x10000000000 #elif defined(__mc68000__) # define TRY_EMPTY_VM_SPACE 0x40000000 #elif defined(__aarch64__) && defined(__ILP32__) diff --git a/gcc/config/sw_64/constraints.md b/gcc/config/sw_64/constraints.md new file mode 100644 index 0000000000000000000000000000000000000000..e5d5c7c7697b7027f132f34325bf88db21bcd6ab --- /dev/null +++ b/gcc/config/sw_64/constraints.md @@ -0,0 +1,123 @@ +;; Constraint definitions for Sw_64. +;; Copyright (C) 2007-2020 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;; Unused letters: +;;; ABCDEF H V YZ +;;; de ghijkl pq tu wxyz + +;; Integer register constraints. + +(define_register_constraint "a" "R24_REG" + "General register 24, input to division routine") + +(define_register_constraint "b" "R25_REG" + "General register 24, input to division routine") + +(define_register_constraint "c" "R27_REG" + "General register 27, function call address") + +(define_register_constraint "f" "TARGET_FPREGS ? FLOAT_REGS : NO_REGS" + "Any floating-point register") + +(define_register_constraint "v" "R0_REG" + "General register 0, function value return address") + +(define_memory_constraint "w" + "A memory whose address is only a register" + (match_operand 0 "mem_noofs_operand")) + +;; Integer constant constraints. +(define_constraint "I" + "An unsigned 8 bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 255)"))) + +(define_constraint "J" + "The constant zero" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "K" + "Signed 16-bit integer constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -32768, 32767)"))) + +(define_constraint "L" + "A shifted signed 16-bit constant appropriate for LDAH" + (and (match_code "const_int") + (match_test "(ival & 0xffff) == 0 + && (ival >> 31 == -1 || ival >> 31 == 0)"))) + +(define_constraint "M" + "A valid operand of a ZAP insn" + (and (match_code "const_int") + (match_test "zap_mask (ival) != 0"))) + +(define_constraint "N" + "A complemented unsigned 8-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (~ival, 0, 255)"))) + +(define_constraint "O" + "A negated unsigned 8-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (-ival, 0, 255)"))) + +(define_constraint "P" + "The constant 1, 2 or 3" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 1, 3)"))) + +;; Floating-point constant constraints. +(define_constraint "G" + "The floating point zero constant" + (and (match_code "const_double") + (match_test "op == CONST0_RTX (mode)"))) + +;; "Extra" constraints. + +;; A memory location that is not a reference +;; (using an AND) to an unaligned location. +(define_memory_constraint "Q" + "@internal A normal_memory_operand" + (and (match_code "mem") + (not (match_code "and" "0")))) + +(define_constraint "R" + "@internal A direct_call_operand" + (match_operand:DI 0 "direct_call_operand")) + +(define_constraint "S" + "An unsigned 6-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 63)"))) + +(define_constraint "T" + "@internal A high-part symbol" + (match_code "high")) + +(define_constraint "W" + "A vector zero constant" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +(define_constraint "Y" + "An unsigned 5-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 31)"))) diff --git a/gcc/config/sw_64/driver-sw_64.c b/gcc/config/sw_64/driver-sw_64.c new file mode 100644 index 0000000000000000000000000000000000000000..84a3692c81f7d558bdbb6c0adde5e264870509a9 --- /dev/null +++ b/gcc/config/sw_64/driver-sw_64.c @@ -0,0 +1,101 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Contributed by Arthur Loiret + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* Chip family type IDs, returned by implver instruction. */ +#define IMPLVER_SW6_FAMILY 2 /* SW6 */ +#define IMPLVER_SW8_FAMILY 4 /* SW8 */ + +/* Bit defines for amask instruction. */ +#define AMASK_BWX 0x1 /* byte/word extension. */ +#define AMASK_FIX \ + 0x2 /* sqrt and f <-> i conversions \ + extension. */ +#define AMASK_CIX 0x4 /* count extension. */ +#define AMASK_MVI 0x100 /* multimedia extension. */ +#define AMASK_PRECISE 0x200 /* Precise arithmetic traps. */ +#define AMASK_LOCKPFTCHOK \ + 0x1000 /* Safe to prefetch lock cache \ + block. */ +#define AMASK_SW6A (1U << 16) +#define AMASK_SW6B (1U << 17) +#define AMASK_SW8A (1U << 18) +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "cpu" or "tune" as argument depending on if -mcpu=native + or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-mcpu=sw6" on an Sw_64 for + -mcpu=native. If the routine can't detect a known processor, + the -mcpu or -mtune option is discarded. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + static const struct cpu_types + { + long implver; + long amask; + const char *const cpu; + } cpu_types[] = {{IMPLVER_SW6_FAMILY, + AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW6A, "sw6a"}, + {IMPLVER_SW6_FAMILY, + AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW6B, "sw6b"}, + {IMPLVER_SW8_FAMILY, + AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW8A, "sw8a"}, + {0, 0, NULL}}; + long implver; + long amask; + const char *cpu; + int i; + + if (argc < 1) + return NULL; + + if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune")) + return NULL; + + implver = __builtin_sw_64_implver (); + amask = __builtin_sw_64_amask (~0L); + cpu = NULL; + + for (i = 0; cpu_types[i].cpu != NULL; i++) + if (implver == cpu_types[i].implver + && (~amask & cpu_types[i].amask) == cpu_types[i].amask) + { + cpu = cpu_types[i].cpu; + break; + } + + if (cpu == NULL) + return NULL; + + return concat ("-m", argv[0], "=", cpu, NULL); +} diff --git a/gcc/config/sw_64/elf.h b/gcc/config/sw_64/elf.h new file mode 100644 index 0000000000000000000000000000000000000000..559a8172ac069ea198a2545d667a008ba42169df --- /dev/null +++ b/gcc/config/sw_64/elf.h @@ -0,0 +1,194 @@ +/* Definitions of target machine for GNU compiler, for Sw_64 w/ELF. + Copyright (C) 1996-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef CC1_SPEC +#define CC1_SPEC "%{G*}" + +#undef ASM_SPEC +#define ASM_SPEC \ + "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug} " \ + "%{mcpu=*:-m%*}" + +/* Do not output a .file directive at the beginning of the input file. */ + +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE, LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", LOG); + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value) \ + switch_to_section (sbss_section); \ + else \ + switch_to_section (bss_section); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + ASM_OUTPUT_ALIGN ((FILE), exact_log2 ((ALIGN) / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + ASM_OUTPUT_SKIP ((FILE), (SIZE) ? (SIZE) : 1); \ + } \ + while (0) + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. */ + +#undef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN); \ + } \ + while (0) + +#undef BSS_SECTION_ASM_OP +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#undef SBSS_SECTION_ASM_OP +#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\"aw\"" +#undef SDATA_SECTION_ASM_OP +#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\"aw\"" + +/* This is how we tell the assembler that two symbols have the same value. */ + +#undef ASM_OUTPUT_DEF +#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME) \ + do \ + { \ + assemble_name (FILE, ALIAS); \ + fputs (" = ", FILE); \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ + } \ + while (0) + +#undef ASM_OUTPUT_DEF_FROM_DECLS +#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \ + do \ + { \ + const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + const char *name = IDENTIFIER_POINTER (TARGET); \ + if (TREE_CODE (DECL) == FUNCTION_DECL) \ + { \ + fputc ('$', FILE); \ + assemble_name (FILE, alias); \ + fputs ("..ng = $", FILE); \ + assemble_name (FILE, name); \ + fputs ("..ng\n", FILE); \ + } \ + ASM_OUTPUT_DEF (FILE, alias, name); \ + } \ + while (0) + +/* Provide a STARTFILE_SPEC appropriate for ELF. Here we add the + (even more) magical crtbegin.o file which provides part of the + support for getting C++ file-scope static object constructed + before entering `main'. */ + +#undef STARTFILE_SPEC +#ifdef HAVE_LD_PIE +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\ + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#else +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\ + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#endif + +/* Provide a ENDFILE_SPEC appropriate for ELF. Here we tack on the + magical crtend.o file which provides part of the support for + getting C++ file-scope static object constructed before entering + `main', followed by a normal ELF "finalizer" file, `crtn.o'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + +/* This variable should be set to 'true' if the target ABI requires + unwinding tables even when exceptions are not used. */ +#define TARGET_UNWIND_TABLES_DEFAULT true + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + Since application size is already constrained to <2GB by the form of + the ldgp relocation, we can use a 32-bit pc-relative relocation to + static data. Dynamic data is accessed indirectly to allow for read + only EH sections. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) + +/* If defined, a C statement to be executed just prior to the output of + assembler code for INSN. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + (sw_64_this_literal_sequence_number = 0, \ + sw_64_this_gpdisp_sequence_number = 0) +extern int sw_64_this_literal_sequence_number; +extern int sw_64_this_gpdisp_sequence_number; + +/* Since the bits of the _init and _fini function is spread across + many object files, each potentially with its own GP, we must assume + we need to load our GP. Further, the .init/.fini section can + easily be more than 4MB away from the function to call so we can't + use bsr. */ +// jsr->call +#ifdef __sw_64_sw8a__ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ + " addpi 0, $29\n" \ + " ldgp $29,0($29)\n" \ + " unop\n" \ + " call $26," USER_LABEL_PREFIX #FUNC "\n" \ + " .align 3\n" \ + " .previous"); +#else +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ + " br $29,1f\n" \ + "1: ldgp $29,0($29)\n" \ + " unop\n" \ + " call $26," USER_LABEL_PREFIX #FUNC "\n" \ + " .align 3\n" \ + " .previous"); +#endif + +/* If we have the capability create headers for efficient EH lookup. + As of Jan 2002, only glibc 2.2.4 can actually make use of this, but + I imagine that other systems will catch up. In the meantime, it + doesn't harm to make sure that the data exists to be used later. */ +#if defined HAVE_LD_EH_FRAME_HDR +#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} " +#endif diff --git a/gcc/config/sw_64/elf.opt b/gcc/config/sw_64/elf.opt new file mode 100644 index 0000000000000000000000000000000000000000..9059fee8c6c9e9982ac17570c2ad58fa3371584c --- /dev/null +++ b/gcc/config/sw_64/elf.opt @@ -0,0 +1,29 @@ +; Sw_64 ELF options. + +; Copyright (C) 2011-2020 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +relax +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/config/sw_64/freebsd.h b/gcc/config/sw_64/freebsd.h new file mode 100644 index 0000000000000000000000000000000000000000..f0b599b7991f5c8cd93545e621832ee1d2685e67 --- /dev/null +++ b/gcc/config/sw_64/freebsd.h @@ -0,0 +1,69 @@ +/* Definitions for Sw_64 running FreeBSD using the ELF format + Copyright (C) 2000-2020 Free Software Foundation, Inc. + Contributed by David E. O'Brien and BSDi. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ + { \ + "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER \ + } + +/* Provide a CPP_SPEC appropriate for FreeBSD/sw_64 -- dealing with + the GCC option `-posix'. */ + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" + +#define LINK_SPEC \ + "%{G*} %{relax:-relax} \ + %{p:%nconsider using '-pg' instead of '-p' with gprof (1)} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(fbsd_dynamic_linker) } \ + %{static:-Bstatic}} \ + %{symbolic:-Bsymbolic}" + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with . GCC defaults come from c-decl.c, + c-common.c, and config//.h. */ + +/* sw_64.h gets this wrong for FreeBSD. We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#define TARGET_ELF 1 + +#undef HAS_INIT_SECTION + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* Don't default to pcc-struct-return, we want to retain compatibility with + older FreeBSD releases AND pcc-struct-return may not be reentrant. */ + +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 0 diff --git a/gcc/config/sw_64/gnu-user.h b/gcc/config/sw_64/gnu-user.h new file mode 100644 index 0000000000000000000000000000000000000000..2c40cb84b7ba0070712cf743ae053f47ec92e73a --- /dev/null +++ b/gcc/config/sw_64/gnu-user.h @@ -0,0 +1,177 @@ +/* Definitions for systems using, at least optionally, a GNU + (glibc-based) userspace or other userspace with libc derived from + glibc (e.g. uClibc) or for which similar specs are appropriate. + Copyright (C) 1995-2020 Free Software Foundation, Inc. + Contributed by Eric Youngdale. + Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Don't assume anything about the header files. */ +//#define SYSTEM_IMPLICIT_EXTERN_C +/* +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" +*/ +#if ENABLE_OFFLOADING == 1 +#define CRTOFFLOADBEGIN "%{fopenacc|fopenmp:crtoffloadbegin%O%s}" +#define CRTOFFLOADEND "%{fopenacc|fopenmp:crtoffloadend%O%s}" +#else +#define CRTOFFLOADBEGIN "" +#define CRTOFFLOADEND "" +#endif + +/* Provide a STARTFILE_SPEC appropriate for GNU userspace. Here we add + the GNU userspace magical crtbegin.o file (see crtstuff.c) which + provides part of the support for getting C++ file-scope static + object constructed before entering `main'. */ + +#if defined HAVE_LD_PIE +#define GNU_USER_TARGET_STARTFILE_SPEC \ + "%{shared:; \ + pg|p|profile:%{static-pie:grcrt1.o%s;:gcrt1.o%s}; \ + static:crt1.o%s; \ + static-pie:rcrt1.o%s; \ + " PIE_SPEC ":Scrt1.o%s; \ + :crt1.o%s} \ + crti.o%s \ + %{static:crtbeginT.o%s; \ + shared|static-pie|" PIE_SPEC ":crtbeginS.o%s; \ + :crtbegin.o%s} \ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_start_preinit.o%s; \ + fvtable-verify=std:vtv_start.o%s} \ + " CRTOFFLOADBEGIN +#else +#define GNU_USER_TARGET_STARTFILE_SPEC \ + "%{shared:; \ + pg|p|profile:gcrt1.o%s; \ + :crt1.o%s} \ + crti.o%s \ + %{static:crtbeginT.o%s; \ + shared|pie|static-pie:crtbeginS.o%s; \ + :crtbegin.o%s} \ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_start_preinit.o%s; \ + fvtable-verify=std:vtv_start.o%s} \ + " CRTOFFLOADBEGIN +#endif +#undef STARTFILE_SPEC +#define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC + +/* Provide a ENDFILE_SPEC appropriate for GNU userspace. Here we tack on + the GNU userspace magical crtend.o file (see crtstuff.c) which + provides part of the support for getting C++ file-scope static + object constructed before entering `main', followed by a normal + GNU userspace "finalizer" file, `crtn.o'. */ + +#if defined HAVE_LD_PIE +#define GNU_USER_TARGET_ENDFILE_SPEC \ + "%{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end_preinit.o%s; \ + fvtable-verify=std:vtv_end.o%s} \ + %{static:crtend.o%s; \ + shared|static-pie|" PIE_SPEC ":crtendS.o%s; \ + :crtend.o%s} \ + crtn.o%s \ + " CRTOFFLOADEND +#else +#define GNU_USER_TARGET_ENDFILE_SPEC \ + "%{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end_preinit.o%s; \ + fvtable-verify=std:vtv_end.o%s} \ + %{static:crtend.o%s; \ + shared|pie|static-pie:crtendS.o%s; \ + :crtend.o%s} \ + crtn.o%s \ + " CRTOFFLOADEND +#endif +#undef ENDFILE_SPEC +#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC + +/* This is for -profile to use -lc_p instead of -lc. */ +#define GNU_USER_TARGET_CC1_SPEC "%{profile:-p}" +#ifndef CC1_SPEC +#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC +#endif + +/* The GNU C++ standard library requires that these macros be defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)" + +#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC \ + "%{shared:-lc} \ + %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#define GNU_USER_TARGET_LIB_SPEC \ + "%{pthread:-lpthread} " GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC + +#undef LIB_SPEC +#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC + +#if defined HAVE_LD_EH_FRAME_HDR +#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} " +#endif + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static|static-pie:--start-group} %G %L \ + %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}" + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +#define TARGET_POSIX_IO + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function + +/* Link -lasan early on the command line. For -static-libasan, don't link + it for -shared link, the executable should be compiled with -static-libasan + in that case, and for executable link with --{,no-}whole-archive around + it to force everything into the executable. And similarly for -ltsan + and -llsan. */ +#if defined HAVE_LD_STATIC_DYNAMIC +#undef LIBASAN_EARLY_SPEC +#define LIBASAN_EARLY_SPEC \ + "%{!shared:libasan_preinit%O%s} " \ + "%{static-libasan:%{!shared:" LD_STATIC_OPTION \ + " --whole-archive -lasan --no-whole-archive " LD_DYNAMIC_OPTION \ + "}}%{!static-libasan:-lasan}" +#undef LIBTSAN_EARLY_SPEC +#define LIBTSAN_EARLY_SPEC \ + "%{!shared:libtsan_preinit%O%s} " \ + "%{static-libtsan:%{!shared:" LD_STATIC_OPTION \ + " --whole-archive -ltsan --no-whole-archive " LD_DYNAMIC_OPTION \ + "}}%{!static-libtsan:-ltsan}" +#undef LIBLSAN_EARLY_SPEC +#define LIBLSAN_EARLY_SPEC \ + "%{!shared:liblsan_preinit%O%s} " \ + "%{static-liblsan:%{!shared:" LD_STATIC_OPTION \ + " --whole-archive -llsan --no-whole-archive " LD_DYNAMIC_OPTION \ + "}}%{!static-liblsan:-llsan}" +#endif diff --git a/gcc/config/sw_64/linux-elf.h b/gcc/config/sw_64/linux-elf.h new file mode 100644 index 0000000000000000000000000000000000000000..f3039c2ff917a98a6423556554945fbb1a13b458 --- /dev/null +++ b/gcc/config/sw_64/linux-elf.h @@ -0,0 +1,54 @@ +/* Definitions of target machine for GNU compiler + for Sw_64 Linux-based GNU systems using ELF. + Copyright (C) 1996-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef EXTRA_SPECS +#define EXTRA_SPECS {"elf_dynamic_linker", ELF_DYNAMIC_LINKER}, + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" +#if DEFAULT_LIBC == LIBC_UCLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}" +#elif DEFAULT_LIBC == LIBC_GLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}" +#else +#error "Unsupported DEFAULT_LIBC" +#endif +#define GNU_USER_DYNAMIC_LINKER \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER) + +#define ELF_DYNAMIC_LINKER GNU_USER_DYNAMIC_LINKER + +#define LINK_SPEC \ + "-m elf64sw_64 %{G*} %{relax:-relax} \ + %{O*:-O3} %{!O*:-O1} \ + %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(elf_dynamic_linker)} \ + %{static:-static}}" + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} " \ + "%{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} " + +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack diff --git a/gcc/config/sw_64/linux.h b/gcc/config/sw_64/linux.h new file mode 100644 index 0000000000000000000000000000000000000000..023fd9fdeeebd90af34bd1dc28c1beab191485a5 --- /dev/null +++ b/gcc/config/sw_64/linux.h @@ -0,0 +1,105 @@ +/* Definitions of target machine for GNU compiler, + for Sw_64 Linux-based GNU systems. + Copyright (C) 1996-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__gnu_linux__"); \ + builtin_define ("_LONGLONG"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + /* The GNU C++ standard library requires this. */ \ + if (c_dialect_cxx ()) \ + builtin_define ("_GNU_SOURCE"); \ + } \ + while (0) + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared: %{profile:-lc_p}%{!profile:-lc}}" + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* Don't care about faults in the prologue. */ +#undef TARGET_CAN_FAULT_IN_PROLOGUE +#define TARGET_CAN_FAULT_IN_PROLOGUE 1 + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#ifdef SINGLE_LIBC +#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) +#undef OPTION_MUSL +#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) +#else +#define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) +#undef OPTION_MUSL +#define OPTION_MUSL (linux_libc == LIBC_MUSL) +#endif + +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function + +#define TARGET_POSIX_IO + +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static|static-pie:--start-group} %G %L \ + %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}" + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +/* Define if long doubles should be mangled as 'g'. */ +#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + +/* -mcpu=native handling only makes sense with compiler running on + an Sw_64 chip. */ +#if defined __sw_64__ || defined __sw_64 +extern const char * +host_detect_local_cpu (int argc, const char **argv); +#define EXTRA_SPEC_FUNCTIONS {"local_cpu_detect", host_detect_local_cpu}, + +#define MCPU_MTUNE_NATIVE_SPECS \ + " %{mcpu=native:%. + +(define_insn "*addsi_er_high_l" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (high:SI (match_operand:SI 2 "local_symbolic_operand"))))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed" + "ldih %0,%2(%1)\t\t!gprelhigh" + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (high:SI (match_operand:SI 1 "local_symbolic_operand")))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:SI (match_dup 2) (high:SI (match_dup 1))))] + "operands[2] = pic_offset_table_rtx;") + +(define_insn "movsi_er_high_g" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_LITERAL))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" +{ + if (INTVAL (operands[3]) == 0) + return "ldw %0,%2(%1)\t\t!literal"; + else + return "ldw %0,%2(%1)\t\t!literal!%3"; +} + [(set_attr "type" "ldsym")]) + +(define_insn "movsi_er_high_g32" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:SI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_LITERAL))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" +{ + if (INTVAL (operands[3]) == 0) + return "ldw %0,%2(%1)\t\t!literal"; + else + return "ldw %0,%2(%1)\t\t!literal!%3"; +} + [(set_attr "type" "ldsym")]) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "global_symbolic_operand"))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 2) + (match_dup 1) + (const_int 0)] UNSPEC_LITERAL))] + "operands[2] = pic_offset_table_rtx;") + +(define_insn "*movsi_er_low_l" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "local_symbolic_operand")))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" +{ + if (true_regnum (operands[1]) == 29) + return "ldi %0,%2(%1)\t\t!gprel"; + else + return "ldi %0,%2(%1)\t\t!gprellow"; +} + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "small_symbolic_operand"))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (lo_sum:SI (match_dup 2) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "local_symbolic_operand"))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:SI (match_dup 2) (high:SI (match_dup 1)))) + (set (match_dup 0) + (lo_sum:SI (match_dup 0) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_expand "prologue_ldgp_32" + [(set (match_dup 0) + (unspec_volatile:SI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec_volatile:SI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))] + "TARGET_SW_M32" +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 27); + operands[2] = (TARGET_EXPLICIT_RELOCS + ? GEN_INT (sw_64_next_sequence_number++) + : const0_rtx); +}) + +(define_insn "*ldgp_er_1_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP1))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" + "ldih %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*ldgp_er_2_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPEC_LDGP2))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" + "ldi %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_er_2_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_PLDGP2))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" +{ + if (stfp3_flag == 1) + return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec1"; + else if (stfp3_flag == 2) + return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec3"; + else if (flag_fpcr_set == 1) + return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec1"; + else if (flag_fpcr_set == 3) + return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec0"; + else + return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:"; +} + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_1_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP1))] + "TARGET_SW_M32" +{ + if (stfp3_flag == 1) + return "ldgp %0,0(%1)\n$%~..ng:\;setfpec1"; + else if (stfp3_flag == 2) + return "ldgp %0,0(%1)\n$%~..ng:\;setfpec3"; + else if (flag_fpcr_set == 1) + return "ldgp %0,0(%1)\n$%~..ng:\;setfpec1"; + else if (flag_fpcr_set == 3) + return "ldgp %0,0(%1)\n$%~..ng:\;setfpec0"; + else + return "ldgp %0,0(%1)\n$%~..ng:"; +} + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_2_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_PLDGP2))] + "TARGET_SW_M32" +) + +(define_insn "*call_value_osf_1_er_32" + [(set (match_operand 0) + (call (mem:DI (match_operand:SI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" + "@ + call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%1\t\t!samegp + ldw $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_osf_1_er_noreturn_32" + [(call (mem:DI (match_operand:SI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + call $26,($27),0 + bsr $26,%0\t\t!samegp + ldw $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1_er_32" + [(call (mem:DI (match_operand:SI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS" + "@ + call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%0\t\t!samegp + ldw $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) diff --git a/gcc/config/sw_64/netbsd.h b/gcc/config/sw_64/netbsd.h new file mode 100644 index 0000000000000000000000000000000000000000..c605c8df2aa2e67fea3a0b8bf9d45f9950008869 --- /dev/null +++ b/gcc/config/sw_64/netbsd.h @@ -0,0 +1,69 @@ +/* Definitions of target machine for GNU compiler, + for Sw_64 NetBSD systems. + Copyright (C) 1998-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF (); \ + } \ + while (0) + +/* NetBSD doesn't use the LANGUAGE* built-ins. */ +#undef SUBTARGET_LANGUAGE_CPP_BUILTINS +#define SUBTARGET_LANGUAGE_CPP_BUILTINS() /* nothing. */ + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* Provide a CPP_SPEC appropriate for NetBSD/sw_64. We use + this to pull in CPP specs that all NetBSD configurations need. */ + +#undef CPP_SPEC +#define CPP_SPEC NETBSD_CPP_SPEC + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ + {"netbsd_link_spec", NETBSD_LINK_SPEC_ELF}, \ + {"netbsd_entry_point", NETBSD_ENTRY_POINT}, \ + {"netbsd_endfile_spec", NETBSD_ENDFILE_SPEC}, + +/* Provide a LINK_SPEC appropriate for a NetBSD/sw_64 ELF target. */ + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{G*} %{relax:-relax} \ + %{O*:-O3} %{!O*:-O1} \ + %(netbsd_link_spec)" + +#define NETBSD_ENTRY_POINT "__start" + +/* Provide an ENDFILE_SPEC appropriate for NetBSD/sw_64 ELF. Here we + add crtend.o, which provides part of the support for getting + C++ file-scope static objects deconstructed after exiting "main". + + We also need to handle the GCC option `-ffast-math'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \ + %(netbsd_endfile_spec)" + +#define HAVE_ENABLE_EXECUTE_STACK diff --git a/gcc/config/sw_64/openbsd.h b/gcc/config/sw_64/openbsd.h new file mode 100644 index 0000000000000000000000000000000000000000..6b20e8dc65f0e29f714e4ed379adf7f0c893ce43 --- /dev/null +++ b/gcc/config/sw_64/openbsd.h @@ -0,0 +1,74 @@ +/* Configuration file for an sw_64 OpenBSD target. + Copyright (C) 1999-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Controlling the compilation driver. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS | MASK_IEEE | MASK_IEEE_CONFORMANT) + +#define LINK_SPEC \ + "%{!shared:%{!nostdlib:%{!r*:%{!e*:-e __start}}}} \ + %{shared:-shared} %{R*} \ + %{static:-Bstatic} \ + %{!static:-Bdynamic} \ + %{rdynamic:-export-dynamic} \ + %{assert*} \ + %{!dynamic-linker:-dynamic-linker /usr/libexec/ld.so}" + +/* As an elf system, we need crtbegin/crtend stuff. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "\ + %{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} \ + %{!p:%{!static:crt0%O%s} %{static:%{nopie:crt0%O%s} \ + %{!nopie:rcrt0%O%s}}}} crtbegin%O%s} %{shared:crtbeginS%O%s}" +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}" + +/* run-time target specifications. */ +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + OPENBSD_OS_CPP_BUILTINS_ELF(); \ + OPENBSD_OS_CPP_BUILTINS_LP64(); \ + } while (0) + +/* Layout of source language data types. */ + +/* This must agree with */ +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +#undef INTMAX_TYPE +#define INTMAX_TYPE "long long int" + +#undef UINTMAX_TYPE +#define UINTMAX_TYPE "long long unsigned int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef WINT_TYPE +#define WINT_TYPE "int" + +#define LOCAL_LABEL_PREFIX "." diff --git a/gcc/config/sw_64/predicates.md b/gcc/config/sw_64/predicates.md new file mode 100644 index 0000000000000000000000000000000000000000..c82d5c7de27c20facd65eb83d758a8ec834d0230 --- /dev/null +++ b/gcc/config/sw_64/predicates.md @@ -0,0 +1,649 @@ +;; Predicate definitions for Sw_64. +;; Copyright (C) 2004-2020 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Return 1 if OP is the zero constant for MODE. +(define_predicate "const0_operand" + (and (match_code "const_int,const_wide_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +;; Returns true if OP is either the constant zero or a register. +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const0_operand"))) + +;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or +;; any register. +(define_predicate "reg_or_6bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant in the range of 0-31 (for a shift) or +;; any register. +(define_predicate "reg_or_5bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is an 8-bit constant. +(define_predicate "cint8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256"))) + +;; Return 1 if OP is an 8-bit constant or any register. +(define_predicate "reg_or_8bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant or any register. +(define_predicate "reg_or_cint_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_int_operand"))) + +;; Return 1 if the operand is a valid second operand to an add insn. +(define_predicate "add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)") + (match_operand 0 "register_operand"))) + +;; Return 1 if the operand is a valid second operand to a +;; sign-extending add insn. +(define_predicate "sext_add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)") + (match_operand 0 "register_operand"))) + +;; Return 1 if the operand is a non-symbolic constant operand that +;; does not satisfy add_operand. +(define_predicate "non_add_const_operand" + (and (match_code "const_int,const_wide_int,const_double,const_vector") + (not (match_operand 0 "add_operand")))) + +;; Return 1 if the operand is a non-symbolic, nonzero constant operand. +(define_predicate "non_zero_const_operand" + (and (match_code "const_int,const_wide_int,const_double,const_vector") + (not (match_test "op == CONST0_RTX (mode)")))) + +;; Return 1 if OP is the constant 1, 2 or 3. +(define_predicate "const123_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + +;; Return 1 if OP is the constant 2 or 3. +(define_predicate "const23_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 3"))) + +;; Return 1 if OP is the constant 4 or 8. +(define_predicate "const48_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 4 || INTVAL (op) == 8"))) + +;; Return 1 if OP is a valid first operand to an AND insn. +(define_predicate "and_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100 + || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100 + || zap_mask (INTVAL (op))") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a valid first operand to an IOR or XOR insn. +(define_predicate "or_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100 + || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant that is the width, in bits, of an integral +;; mode not larger than DImode. +(define_predicate "mode_width_operand" + (match_code "const_int") +{ + HOST_WIDE_INT i = INTVAL (op); + return i == 8 || i == 16 || i == 32 || i == 64; +}) + +;; Return 1 if OP is a constant that is a mask of ones of width of an +;; integral machine mode not larger than DImode. +(define_predicate "mode_mask_operand" + (match_code "const_int") +{ + HOST_WIDE_INT value = INTVAL (op); + + if (value == 0xff) + return 1; + if (value == 0xffff) + return 1; + if (value == 0xffffffff) + return 1; + if (value == -1) + return 1; + + return 0; +}) + +;; Return 1 if OP is a multiple of 8 less than 64. +(define_predicate "mul8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT i = INTVAL (op); + return i < 64 && i % 8 == 0; +}) + +;; Return 1 if OP is a hard floating-point register. +(define_predicate "hard_fp_register_operand" + (match_operand 0 "register_operand") +{ + if (SUBREG_P (op)) + op = SUBREG_REG (op); + return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS; +}) + +;; Return 1 if OP is a hard general register. +(define_predicate "hard_int_register_operand" + (match_operand 0 "register_operand") +{ + if (SUBREG_P (op)) + op = SUBREG_REG (op); + return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS; +}) + +;; Return 1 if OP is a valid operand for the source of a move insn. +(define_predicate "input_operand" + (match_operand 0 "general_operand") +{ + switch (GET_CODE (op)) + { + case LABEL_REF: + case SYMBOL_REF: + case CONST: + if (TARGET_EXPLICIT_RELOCS) + { + /* We don't split symbolic operands into something unintelligable + until after reload, but we do not wish non-small, non-global + symbolic operands to be reconstructed from their high/lo_sum + form. */ + return (small_symbolic_operand (op, mode) + || global_symbolic_operand (op, mode) + || gotdtp_symbolic_operand (op, mode) + || gottp_symbolic_operand (op, mode)); + } + /* VMS still has a 32-bit mode. */ + return mode == ptr_mode || mode == Pmode; + + case HIGH: + return (TARGET_EXPLICIT_RELOCS + && local_symbolic_operand (XEXP (op, 0), mode)); + + case REG: + return 1; + + case SUBREG: + if (register_operand (op, mode)) + return 1; + /* fall through. */ + case MEM: + return ((TARGET_BWX || (mode != HImode && mode != QImode)) + && general_operand (op, mode)); + + case CONST_WIDE_INT: + case CONST_DOUBLE: + return op == CONST0_RTX (mode); + + case CONST_VECTOR: + if (reload_in_progress || reload_completed) + return sw_64_legitimate_constant_p (mode, op); + return op == CONST0_RTX (mode); + + case CONST_INT: + if (mode == QImode || mode == HImode) + return true; + if (reload_in_progress || reload_completed) + return sw_64_legitimate_constant_p (mode, op); + return add_operand (op, mode); + + default: + gcc_unreachable (); + } + return 0; +}) + +;; Return 1 if OP is a SYMBOL_REF for a function known to be in this +;; file, and in the same section as the current function. + +(define_predicate "samegp_function_operand" + (match_code "symbol_ref") +{ + /* Easy test for recursion. */ + if (op == XEXP (DECL_RTL (current_function_decl), 0)) + return true; + + /* Functions that are not local can be overridden, and thus may + not share the same gp. */ + if (! SYMBOL_REF_LOCAL_P (op)) + return false; + + /* If -msmall-data is in effect, assume that there is only one GP + for the module, and so any local symbol has this property. We + need explicit relocations to be able to enforce this for symbols + not defined in this unit of translation, however. */ + if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) + return true; + + /* Functions that are not external are defined in this UoT, + and thus must share the same gp. */ + return ! SYMBOL_REF_EXTERNAL_P (op); +}) + +;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr. +(define_predicate "direct_call_operand" + (match_operand 0 "samegp_function_operand") +{ + /* If profiling is implemented via linker tricks, we can't jump + to the nogp alternate entry point. Note that crtl->profile + would not be correct, since that doesn't indicate if the target + function uses profiling. */ + /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test, + but is approximately correct for the SYSV ABIs. Don't know + what to do for VMS, NT, or UMK. */ + if (!TARGET_PROFILING_NEEDS_GP && profile_flag) + return false; + + /* Must be a function. In some cases folks create thunks in static + data structures and then make calls to them. If we allow the + direct call, we'll get an error from the linker about !samegp reloc + against a symbol without a .prologue directive. */ + if (!SYMBOL_REF_FUNCTION_P (op)) + return false; + + /* Must be "near" so that the branch is assumed to reach. With + -msmall-text, this is assumed true of all local symbols. Since + we've already checked samegp, locality is already assured. */ + if (TARGET_SMALL_TEXT) + return true; + + return false; +}) + +;; Return 1 if OP is a valid operand for the MEM of a CALL insn. +;; +;; For TARGET_ABI_SYSV, we want to restrict to R27 or a pseudo. + +(define_predicate "call_operand" + (ior (match_code "symbol_ref") + (and (match_code "reg") + (ior (not (match_test "TARGET_ABI_OSF")) + (not (match_test "HARD_REGISTER_P (op)")) + (match_test "REGNO (op) == R27_REG"))))) + +;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing +;; a (non-tls) variable known to be defined in this file. +(define_predicate "local_symbolic_operand" + (match_code "label_ref,const,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) == LABEL_REF) + return 1; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + return (SYMBOL_REF_LOCAL_P (op) + && !SYMBOL_REF_WEAK (op) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Return true if OP is a SYMBOL_REF or CONST referencing a variable +;; known to be defined in this file in the small data area. +(define_predicate "small_symbolic_operand" + (match_code "const,symbol_ref") +{ + HOST_WIDE_INT ofs = 0, max_ofs = 0; + + if (! TARGET_SMALL_DATA) + return false; + + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + { + ofs = INTVAL (XEXP (XEXP (op, 0), 1)); + op = XEXP (XEXP (op, 0), 0); + } + + if (GET_CODE (op) != SYMBOL_REF) + return false; + + /* ??? There's no encode_section_info equivalent for the rtl + constant pool, so SYMBOL_FLAG_SMALL never gets set. */ + if (CONSTANT_POOL_ADDRESS_P (op)) + { + max_ofs = GET_MODE_SIZE (get_pool_mode (op)); + if (max_ofs > g_switch_value) + return false; + } + else if (SYMBOL_REF_LOCAL_P (op) + && SYMBOL_REF_SMALL_P (op) + && !SYMBOL_REF_WEAK (op) + && !SYMBOL_REF_TLS_MODEL (op)) + { + if (SYMBOL_REF_DECL (op)) + max_ofs = tree_to_uhwi (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op))); + } + else + return false; + + /* Given that we know that the GP is always 8 byte aligned, we can + always adjust by 7 without overflowing. */ + if (max_ofs < 8) + max_ofs = 8; + + /* Since we know this is an object in a small data section, we know the + entire section is addressable via GP. We don't know where the section + boundaries are, but we know the entire object is within. */ + /*return IN_RANGE (ofs, 0, max_ofs - 1);*/ + + if (sw_64_gprel_size == 16) + return IN_RANGE (ofs, 0, max_ofs - 1); + if (sw_64_gprel_size == 32) + return false; + +}) + +;; Return true if OP is a SYMBOL_REF or CONST referencing a variable +;; not known (or known not) to be defined in this file. +(define_predicate "global_symbolic_operand" + (match_code "const,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op)) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref, +;; possibly with an offset. +(define_predicate "symbolic_operand" + (ior (match_code "symbol_ref,label_ref") + (and (match_code "const") + (match_code "plus" "0") + (match_code "symbol_ref,label_ref" "00") + (match_code "const_int" "01")))) + +;; Return true if OP is valid for 16-bit DTP relative relocations. +(define_predicate "dtp16_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 32-bit DTP relative relocations. +(define_predicate "dtp32_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 64-bit DTP relative relocations. +(define_predicate "gotdtp_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 16-bit TP relative relocations. +(define_predicate "tp16_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)"))) + +;; Return true if OP is valid for 32-bit TP relative relocations. +(define_predicate "tp32_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)"))) + +;; Return true if OP is valid for 64-bit TP relative relocations. +(define_predicate "gottp_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)"))) + +;; Return 1 if this memory address is a known aligned register plus +;; a constant. It must be a valid address. This means that we can do +;; this as an aligned reference plus some offset. +;; +;; Take into account what reload will do. Oh god this is awful. +;; The horrible comma-operator construct below is to prevent genrecog +;; from thinking that this predicate accepts REG and SUBREG. We don't +;; use recog during reload, so pretending these codes are accepted +;; pessimizes things a tad. + +(define_special_predicate "aligned_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (match_code "mem")) +{ + rtx base; + int offset; + + if (MEM_ALIGN (op) >= 32) + return 1; + + op = XEXP (op, 0); + + /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo) + sorts of constructs. Dig for the real base register. */ + if (reload_in_progress + && GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == PLUS) + { + base = XEXP (XEXP (op, 0), 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + if (! memory_address_p (mode, op)) + return 0; + if (GET_CODE (op) == PLUS) + { + base = XEXP (op, 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + base = op; + offset = 0; + } + } + + if (offset % GET_MODE_SIZE (mode)) + return 0; + + return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32); +}) + +;; Similar, but return 1 if OP is a MEM which is not alignable. + +(define_special_predicate "unaligned_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (match_code "mem")) +{ + rtx base; + int offset; + + if (MEM_ALIGN (op) >= 32) + return 0; + + op = XEXP (op, 0); + + /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo) + sorts of constructs. Dig for the real base register. */ + if (reload_in_progress + && GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == PLUS) + { + base = XEXP (XEXP (op, 0), 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + if (! memory_address_p (mode, op)) + return 0; + if (GET_CODE (op) == PLUS) + { + base = XEXP (op, 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + base = op; + offset = 0; + } + } + + if (offset % GET_MODE_SIZE (mode)) + return 1; + + return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32); +}) + +;; Return 1 if OP is any memory location. During reload a pseudo matches. +(define_special_predicate "any_memory_operand" + (match_code "mem,reg,subreg") +{ + if (SUBREG_P (op)) + op = SUBREG_REG (op); + + if (MEM_P (op)) + return true; + if (reload_in_progress && REG_P (op)) + { + unsigned regno = REGNO (op); + if (HARD_REGISTER_NUM_P (regno)) + return false; + else + return reg_renumber[regno] < 0; + } + + return false; +}) + +;; Returns 1 if OP is not an eliminable register. +;; +;; This exists to cure a pathological failure in the s8addq (et al) patterns, +;; +;; long foo () { long t; bar (); return (long) &t * 26107; } +;; +;; which run afoul of a hack in reload to cure a (presumably) similar +;; problem with lea-type instructions on other targets. But there is +;; one of us and many of them, so work around the problem by selectively +;; preventing combine from making the optimization. + +(define_predicate "reg_not_elim_operand" + (match_operand 0 "register_operand") +{ + if (SUBREG_P (op)) + op = SUBREG_REG (op); + return op != frame_pointer_rtx && op != arg_pointer_rtx; +}) + +;; Accept a register, but not a subreg of any kind. This allows us to +;; avoid pathological cases in reload wrt data movement common in +;; int->fp conversion. */ +(define_predicate "reg_no_subreg_operand" + (and (match_code "reg") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a valid Sw_64 comparison operator for "cbranch" +;; instructions. +(define_predicate "sw_64_cbranch_operator" + (ior (match_operand 0 "ordered_comparison_operator") + (match_code "ordered,unordered"))) + +;; Return 1 if OP is a valid Sw_64 comparison operator for "cmp" style +;; instructions. +(define_predicate "sw_64_comparison_operator" + (match_code "eq,le,lt,leu,ltu")) + +;; Similarly, but with swapped operands. +(define_predicate "sw_64_swapped_comparison_operator" + (match_code "eq,ge,gt,gtu")) + +;; Return 1 if OP is a valid Sw_64 comparison operator against zero +;; for "bcc" style instructions. +(define_predicate "sw_64_zero_comparison_operator" + (match_code "eq,ne,le,lt,leu,ltu")) + +;; Return 1 if OP is a signed comparison operation. +(define_predicate "signed_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt")) + +;; Return 1 if OP is a valid Sw_64 floating point comparison operator. +(define_predicate "sw_64_fp_comparison_operator" + (match_code "eq,le,lt,unordered")) + +;; Return 1 if this is a divide or modulus operator. +(define_predicate "divmod_operator" + (match_code "div,mod,udiv,umod")) + +;; Return 1 if this is a float->int conversion operator. +(define_predicate "fix_operator" + (match_code "fix,unsigned_fix")) + +;; Recognize an addition operation that includes a constant. Used to +;; convince reload to canonize (plus (plus reg c1) c2) during register +;; elimination. + +(define_predicate "addition_operation" + (and (match_code "plus") + (match_test "register_operand (XEXP (op, 0), mode) + && satisfies_constraint_K (XEXP (op, 1))"))) + +;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a +;; small symbolic operand until after reload. At which point we need +;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref)) +;; so that sched2 has the proper dependency information. */ +(define_predicate "some_small_symbolic_operand" + (match_code "set,parallel,prefetch,unspec,unspec_volatile") +{ + /* Avoid search unless necessary. */ + if (!TARGET_EXPLICIT_RELOCS || !reload_completed) + return false; + return some_small_symbolic_operand_int (op); +}) + +;; Accept a register, or a memory if BWX is enabled. +(define_predicate "reg_or_bwx_memory_operand" + (ior (match_operand 0 "register_operand") + (and (match_test "TARGET_BWX") + (match_operand 0 "memory_operand")))) + +;; Accept a memory whose address is only a register. +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) + +(define_predicate "sw_64_branch_combination" + (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu")) + +(define_predicate "sw_64_swapped_branch_combination" + (match_code "ne,ge,gt,geu,gtu")) + diff --git a/gcc/config/sw_64/sw6.md b/gcc/config/sw_64/sw6.md new file mode 100644 index 0000000000000000000000000000000000000000..615ddae7079debcceeda8a62b64c2b623b42e01e --- /dev/null +++ b/gcc/config/sw_64/sw6.md @@ -0,0 +1,181 @@ +;; Scheduling description for Sw_64 SW6. +;; Copyright (C) 2002-2020 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; SW6 can issue 4 insns per clock. It's out-of-order, so this isn't +; expected to help over-much, but a precise description can be important +; for software pipelining. +; +; SW6 has two symmetric pairs ("clusters") of two asymmetric integer +; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1. +; +; ??? The clusters have independent register files that are re-synced +; every cycle. Thus there is one additional cycle of latency between +; insns issued on different clusters. Possibly model that by duplicating +; all EBOX insn_reservations that can issue to either cluster, increasing +; all latencies by one, and adding bypasses within the cluster. +; +; ??? In addition, instruction order affects cluster issue. + +(define_automaton "sw6_0,sw6_1") +(define_cpu_unit "sw6_u0,sw6_u1,sw6_l0,sw6_l1" "sw6_0") +(define_reservation "sw6_u" "sw6_u0|sw6_u1") +(define_reservation "sw6_l" "sw6_l0|sw6_l1") +(define_reservation "sw6_ebox" "sw6_u|sw6_l") + +(define_cpu_unit "sw6_fa" "sw6_1") +(define_cpu_unit "sw6_fm,sw6_fst0,sw6_fst1" "sw6_0") +(define_reservation "sw6_fst" "sw6_fst0|sw6_fst1") + +; Assume type "multi" single issues. +(define_insn_reservation "sw6_multi" 1 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "multi")) + "sw6_u0+sw6_u1+sw6_l0+sw6_l1+sw6_fa+sw6_fm+sw6_fst0+sw6_fst1") + +; Integer loads take at least 3 clocks, and only issue to lower units. +; adjust_cost still factors in user-specified memory latency, so return 1 here. +(define_insn_reservation "sw6_ild" 4 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "ild,ldsym,ld_l")) + "sw6_l") + +(define_insn_reservation "sw6_ist" 4 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "ist,st_c")) + "sw6_l") + +(define_insn_reservation "sw6_mb" 1 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "mb")) + "sw6_l1") + +; FP loads take at least 4 clocks. adjust_cost still factors +; in user-specified memory latency, so return 2 here. +(define_insn_reservation "sw6_fld" 2 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "fld")) + "sw6_l") + +; The FPU communicates with memory and the integer register file +; via two fp store units. We need a slot in the fst immediately, and +; a slot in LOW after the operand data is ready. At which point the +; data may be moved either to the store queue or the integer register +; file and the insn retired. + +(define_insn_reservation "sw6_fst" 3 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "fst")) + "sw6_fst,nothing,sw6_l") + +; Arithmetic goes anywhere. +(define_insn_reservation "sw6_arith" 1 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "iadd,ilog,icmp")) + "sw6_ebox") + +; Motion video insns also issue only to U0, and take three ticks. +(define_insn_reservation "sw6_mvi" 3 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "mvi")) + "sw6_u0") + +; Shifts issue to upper units. +(define_insn_reservation "sw6_shift" 1 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "shift")) + "sw6_u") + +; Multiplies issue only to U1, and all take 7 ticks. +(define_insn_reservation "sw6_imul" 7 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "imul")) + "sw6_u1") + +; Conditional moves decompose into two independent primitives, each taking +; one cycle. Since sw6 is out-of-order, we can't see anything but two cycles. +(define_insn_reservation "sw6_icmov" 2 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "icmov")) + "sw6_ebox,sw6_ebox") + +; Integer branches issue to upper units +(define_insn_reservation "sw6_ibr" 1 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "ibr,callpal")) + "sw6_u") + +; Calls only issue to L0. +(define_insn_reservation "sw6_jsr" 1 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "call")) + "sw6_l0") + +; Ftoi/itof only issue to lower pipes. +(define_insn_reservation "sw6_itof" 3 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "itof")) + "sw6_l") + +(define_insn_reservation "sw6_ftoi" 3 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "ftoi")) + "sw6_fst,nothing,sw6_l") + +(define_insn_reservation "sw6_fmul" 4 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "fmul")) + "sw6_fm") + +(define_insn_reservation "sw6_fadd" 4 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "fadd,fcpys,fbr")) + "sw6_fa") + +(define_bypass 6 "sw6_fmul,sw6_fadd" "sw6_fst,sw6_ftoi") + +(define_insn_reservation "sw6_fcmov" 8 + (and (eq_attr "tune" "sw6") + (eq_attr "type" "fcmov")) + "sw6_fa,nothing*3,sw6_fa") + +(define_bypass 10 "sw6_fcmov" "sw6_fst,sw6_ftoi") + +(define_insn_reservation "sw6_fdivsf" 12 + (and (eq_attr "tune" "sw6") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + "sw6_fa*9") + +(define_insn_reservation "sw6_fdivdf" 15 + (and (eq_attr "tune" "sw6") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + "sw6_fa*12") + +(define_insn_reservation "sw6_sqrtsf" 18 + (and (eq_attr "tune" "sw6") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "si"))) + "sw6_fa*15") + +(define_insn_reservation "sw6_sqrtdf" 33 + (and (eq_attr "tune" "sw6") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "di"))) + "sw6_fa*30") diff --git a/gcc/config/sw_64/sw8.md b/gcc/config/sw_64/sw8.md new file mode 100644 index 0000000000000000000000000000000000000000..414908dbc8fbec5a1951da54666e7c23d64e9eb1 --- /dev/null +++ b/gcc/config/sw_64/sw8.md @@ -0,0 +1,181 @@ +;; Scheduling description for Sw_64 SW8. +;; Copyright (C) 2002-2020 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; SW8 can issue 4 insns per clock. It's out-of-order, so this isn't +; expected to help over-much, but a precise description can be important +; for software pipelining. +; +; SW8 has two symmetric pairs ("clusters") of two asymmetric integer +; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1. +; +; ??? The clusters have independent register files that are re-synced +; every cycle. Thus there is one additional cycle of latency between +; insns issued on different clusters. Possibly model that by duplicating +; all EBOX insn_reservations that can issue to either cluster, increasing +; all latencies by one, and adding bypasses within the cluster. +; +; ??? In addition, instruction order affects cluster issue. + +(define_automaton "sw8_0,sw8_1") +(define_cpu_unit "sw8_u0,sw8_u1,sw8_l0,sw8_l1" "sw8_0") +(define_reservation "sw8_u" "sw8_u0|sw8_u1") +(define_reservation "sw8_l" "sw8_l0|sw8_l1") +(define_reservation "sw8_ebox" "sw8_u|sw8_l") + +(define_cpu_unit "sw8_fa" "sw8_1") +(define_cpu_unit "sw8_fm,sw8_fst0,sw8_fst1" "sw8_0") +(define_reservation "sw8_fst" "sw8_fst0|sw8_fst1") + +; Assume type "multi" single issues. +(define_insn_reservation "sw8_multi" 1 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "multi")) + "sw8_u0+sw8_u1+sw8_l0+sw8_l1+sw8_fa+sw8_fm+sw8_fst0+sw8_fst1") + +; Integer loads take at least 3 clocks, and only issue to lower units. +; adjust_cost still factors in user-specified memory latency, so return 1 here. +(define_insn_reservation "sw8_ild" 4 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "ild,ldsym,ld_l")) + "sw8_l") + +(define_insn_reservation "sw8_ist" 4 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "ist,st_c")) + "sw8_l") + +(define_insn_reservation "sw8_mb" 1 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "mb")) + "sw8_l1") + +; FP loads take at least 4 clocks. adjust_cost still factors +; in user-specified memory latency, so return 2 here. +(define_insn_reservation "sw8_fld" 2 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "fld")) + "sw8_l") + +; The FPU communicates with memory and the integer register file +; via two fp store units. We need a slot in the fst immediately, and +; a slot in LOW after the operand data is ready. At which point the +; data may be moved either to the store queue or the integer register +; file and the insn retired. + +(define_insn_reservation "sw8_fst" 3 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "fst")) + "sw8_fst,nothing,sw8_l") + +; Arithmetic goes anywhere. +(define_insn_reservation "sw8_arith" 1 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "iadd,ilog,icmp")) + "sw8_ebox") + +; Motion video insns also issue only to U0, and take three ticks. +(define_insn_reservation "sw8_mvi" 3 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "mvi")) + "sw8_u0") + +; Shifts issue to upper units. +(define_insn_reservation "sw8_shift" 1 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "shift")) + "sw8_u") + +; Multiplies issue only to U1, and all take 7 ticks. +(define_insn_reservation "sw8_imul" 7 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "imul")) + "sw8_u1") + +; Conditional moves decompose into two independent primitives, each taking +; one cycle. Since sw8 is out-of-order, we can't see anything but two cycles. +(define_insn_reservation "sw8_icmov" 2 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "icmov")) + "sw8_ebox,sw8_ebox") + +; Integer branches issue to upper units +(define_insn_reservation "sw8_ibr" 1 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "ibr,callpal")) + "sw8_u") + +; Calls only issue to L0. +(define_insn_reservation "sw8_jsr" 1 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "call")) + "sw8_l0") + +; Ftoi/itof only issue to lower pipes. +(define_insn_reservation "sw8_itof" 3 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "itof")) + "sw8_l") + +(define_insn_reservation "sw8_ftoi" 3 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "ftoi")) + "sw8_fst,nothing,sw8_l") + +(define_insn_reservation "sw8_fmul" 4 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "fmul")) + "sw8_fm") + +(define_insn_reservation "sw8_fadd" 4 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "fadd,fcpys,fbr")) + "sw8_fa") + +(define_bypass 6 "sw8_fmul,sw8_fadd" "sw8_fst,sw8_ftoi") + +(define_insn_reservation "sw8_fcmov" 8 + (and (eq_attr "tune" "sw8") + (eq_attr "type" "fcmov")) + "sw8_fa,nothing*3,sw8_fa") + +(define_bypass 10 "sw8_fcmov" "sw8_fst,sw8_ftoi") + +(define_insn_reservation "sw8_fdivsf" 12 + (and (eq_attr "tune" "sw8") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + "sw8_fa*9") + +(define_insn_reservation "sw8_fdivdf" 15 + (and (eq_attr "tune" "sw8") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + "sw8_fa*12") + +(define_insn_reservation "sw8_sqrtsf" 18 + (and (eq_attr "tune" "sw8") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "si"))) + "sw8_fa*15") + +(define_insn_reservation "sw8_sqrtdf" 33 + (and (eq_attr "tune" "sw8") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "di"))) + "sw8_fa*30") diff --git a/gcc/config/sw_64/sw_64-modes.def b/gcc/config/sw_64/sw_64-modes.def new file mode 100644 index 0000000000000000000000000000000000000000..537a1b6545fb679d60babd417af2dcfc2916c2f1 --- /dev/null +++ b/gcc/config/sw_64/sw_64-modes.def @@ -0,0 +1,27 @@ +/* Sw_64 extra machine modes. + Copyright (C) 2003-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* 128-bit floating point. This gets reset in sw_64_option_override + if VAX float format is in use. */ +FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ +VECTOR_MODE (INT, QI, 4); /* V4QI. */ +VECTOR_MODE (INT, QI, 2); /* V2QI. */ diff --git a/gcc/config/sw_64/sw_64-passes.def b/gcc/config/sw_64/sw_64-passes.def new file mode 100644 index 0000000000000000000000000000000000000000..9d3964cdb3507cf6263c4c7c93131628942ae3ea --- /dev/null +++ b/gcc/config/sw_64/sw_64-passes.def @@ -0,0 +1,21 @@ +/* Description of target passes for Sw_64 + Copyright (C) 2016-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + INSERT_PASS_AFTER (pass_convert_to_eh_region_ranges, 1, pass_handle_trap_shadows); + INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_insns); diff --git a/gcc/config/sw_64/sw_64-protos.h b/gcc/config/sw_64/sw_64-protos.h new file mode 100644 index 0000000000000000000000000000000000000000..c20a1cfece2ba34e8b9f0eca200ec1669d3cc673 --- /dev/null +++ b/gcc/config/sw_64/sw_64-protos.h @@ -0,0 +1,146 @@ +/* Prototypes for sw_64.c functions used in the md file & elsewhere. + Copyright (C) 1999-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern int sw_64_next_sequence_number; + +extern void +literal_section (void); +extern int zap_mask (HOST_WIDE_INT); +extern bool +direct_return (void); + +extern HOST_WIDE_INT +sw_64_initial_elimination_offset (unsigned int, unsigned int); +extern void +sw_64_expand_prologue (void); +extern void +sw_64_expand_epilogue (void); +extern void +sw_64_output_filename (FILE *, const char *); + +extern bool sw_64_legitimate_constant_p (machine_mode, rtx); +extern rtx +sw_64_legitimize_reload_address (rtx, machine_mode, int, int, int); + +extern rtx split_small_symbolic_operand (rtx); + +extern void +get_aligned_mem (rtx, rtx *, rtx *); +extern rtx get_unaligned_address (rtx); +extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT); +extern enum reg_class sw_64_preferred_reload_class (rtx, enum reg_class); + +extern void sw_64_set_memflags (rtx, rtx); +extern bool +sw_64_split_const_mov (machine_mode, rtx *); +extern bool +sw_64_expand_mov (machine_mode, rtx *); +extern bool +sw_64_expand_mov_nobwx (machine_mode, rtx *); +extern void +sw_64_expand_movmisalign (machine_mode, rtx *); +extern void sw_64_emit_floatuns (rtx[]); +extern rtx sw_64_emit_conditional_move (rtx, machine_mode); +extern void +sw_64_split_tmode_pair (rtx[], machine_mode, bool); +extern void sw_64_split_tfmode_frobsign (rtx[], rtx (*) (rtx, rtx, rtx)); +extern void +sw_64_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT, HOST_WIDE_INT, int); +extern void sw_64_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT); +extern int sw_64_expand_block_move (rtx[]); +extern int sw_64_expand_block_clear (rtx[]); +extern rtx sw_64_expand_zap_mask (HOST_WIDE_INT); +extern void sw_64_expand_builtin_vector_binop (rtx (*) (rtx, rtx, rtx), + machine_mode, rtx, rtx, rtx); + +extern rtx +sw_64_return_addr (int, rtx); +extern rtx +sw_64_gp_save_rtx (void); +extern void +sw_64_initialize_trampoline (rtx, rtx, rtx, int, int, int); + +extern rtx sw_64_va_arg (tree, tree); + +extern void +sw_64_start_function (FILE *, const char *, tree); +extern void +sw_64_end_function (FILE *, const char *, tree); + +extern bool sw_64_find_lo_sum_using_gp (rtx); + +#ifdef REAL_VALUE_TYPE +extern int +check_float_value (machine_mode, REAL_VALUE_TYPE *, int); +#endif + +#ifdef RTX_CODE +extern void sw_64_emit_conditional_branch (rtx[], machine_mode); +extern bool sw_64_emit_setcc (rtx[], machine_mode); +extern int sw_64_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx); +extern void sw_64_emit_xfloating_arith (enum rtx_code, rtx[]); +extern void sw_64_emit_xfloating_cvt (enum rtx_code, rtx[]); +extern void sw_64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, + enum memmodel); +extern void +sw_64_split_compare_and_swap (rtx op[]); +extern void +sw_64_expand_compare_and_swap_12 (rtx op[]); +extern void +sw_64_split_compare_and_swap_12 (rtx op[]); +extern void +sw_64_split_atomic_exchange (rtx op[]); +extern void +sw_64_expand_atomic_exchange_12 (rtx op[]); +extern void +sw_64_split_atomic_exchange_12 (rtx op[]); +#endif + +extern void +sw_64_split_atomic_cas (rtx op[]); +extern void +sw_64_split_atomic_cas_12 (rtx op[]); + +extern rtx +sw_64_use_linkage (rtx, bool, bool); + +extern rtx unicosmk_add_call_info_word (rtx); + +extern bool some_small_symbolic_operand_int (rtx); +extern int +tls_symbolic_operand_1 (rtx, int, int); +extern rtx resolve_reload_operand (rtx); + +namespace gcc { +class context; +} +class rtl_opt_pass; + +extern rtl_opt_pass * +make_pass_handle_trap_shadows (gcc::context *); +extern rtl_opt_pass * +make_pass_align_insns (gcc::context *); + +extern void +sw_64_emit_rsqrt (rtx, rtx, bool); + +extern void +sw_64_emit_swdiv (rtx, rtx, rtx, bool); +extern rtx gen_move_reg (rtx); diff --git a/gcc/config/sw_64/sw_64.c b/gcc/config/sw_64/sw_64.c new file mode 100644 index 0000000000000000000000000000000000000000..68b85b828968bfeab12fb15f8d0ff303f37f913b --- /dev/null +++ b/gcc/config/sw_64/sw_64.c @@ -0,0 +1,10058 @@ +/* Subroutines used for code generation on the Sw_64. + Copyright (C) 1992-2020 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "stringpool.h" +#include "attribs.h" +#include "memmodel.h" +#include "gimple.h" +#include "df.h" +#include "predict.h" +#include "tm_p.h" +#include "ssa.h" +#include "expmed.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "alias.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "output.h" +#include "insn-attr.h" +#include "explow.h" +#include "expr.h" +#include "reload.h" +#include "except.h" +#include "common/common-target.h" +#include "debug.h" +#include "langhooks.h" +#include "cfgrtl.h" +#include "tree-pass.h" +#include "context.h" +#include "gimple-iterator.h" +#include "gimplify.h" +#include "tree-stdarg.h" +#include "tm-constrs.h" +#include "libfuncs.h" +#include "opts.h" +#include "builtins.h" +#include "rtl-iter.h" +#include "asan.h" + +#include "flags.h" +/* This file should be included last. */ +#include "target-def.h" + +/* Specify which cpu to schedule for. */ +enum processor_type sw_64_tune; + +/* Which cpu we're generating code for. */ +enum processor_type sw_64_cpu; + +static const char *const sw_64_cpu_name[] = {"sw6", "sw8a"}; + +/* Specify how accurate floating-point traps need to be. */ + +enum sw_64_trap_precision sw_64_tp; + +/* Specify the floating-point rounding mode. */ + +enum sw_64_fp_rounding_mode sw_64_fprm; + +/* Specify which things cause traps. */ + +enum sw_64_fp_trap_mode sw_64_fptm; + +/* Nonzero if inside of a function, because the Sw_64 asm can't + handle .files inside of functions. */ + +static int inside_function = FALSE; + +/* The number of cycles of latency we should assume on memory reads. */ + +static int sw_64_memory_latency = 3; + +/* Whether the function needs the GP. */ + +static int sw_64_function_needs_gp; + +/* The assembler name of the current function. */ + +static const char *sw_64_fnname; + +/* The next explicit relocation sequence number. */ +extern GTY (()) int sw_64_next_sequence_number; +int sw_64_next_sequence_number = 1; + +int stfp3_flag; +extern int flag_fpcr_set; + +int warning_sbt_num = 0; +int warning_cbt_num = 0; + +/* The literal and gpdisp sequence numbers for this insn, as printed + by %# and %* respectively. */ +extern GTY (()) int sw_64_this_literal_sequence_number; +extern GTY (()) int sw_64_this_gpdisp_sequence_number; +int sw_64_this_literal_sequence_number; +int sw_64_this_gpdisp_sequence_number; + +/* Costs of various operations on the different architectures. */ + +struct sw_64_rtx_cost_data +{ + unsigned char fp_add; + unsigned char fp_mult; + unsigned char fp_div_sf; + unsigned char fp_div_df; + unsigned char int_mult_si; + unsigned char int_mult_di; + unsigned char int_shift; + unsigned char int_cmov; + unsigned short int_div; +}; + +static struct sw_64_rtx_cost_data const sw_64_rtx_cost_data[PROCESSOR_MAX + 1] + = { + { + /* sw6b */ + COSTS_N_INSNS (6), /* fp_add */ + COSTS_N_INSNS (6), /* fp_mult */ + COSTS_N_INSNS (19), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (4), /* int_mult_si */ + COSTS_N_INSNS (4), /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (83), /* int_div */ + }, + { + /* sw8a */ + COSTS_N_INSNS (6), /* fp_add */ + COSTS_N_INSNS (6), /* fp_mult */ + COSTS_N_INSNS (19), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (4), /* int_mult_si */ + COSTS_N_INSNS (4), /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (20), /* int_div */ + }, + { + /* rtx-cost */ + COSTS_N_INSNS (6), /* fp_add */ + COSTS_N_INSNS (6), /* fp_mult */ + COSTS_N_INSNS (19), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (4), /* int_mult_si */ + COSTS_N_INSNS (4), /* int_mult_di */ + COSTS_N_INSNS (3), /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (20), /* int_div */ + }, +}; + +/* Similar but tuned for code size instead of execution latency. The + extra +N is fractional cost tuning based on latency. It's used to + encourage use of cheaper insns like shift, but only if there's just + one of them. */ + +static struct sw_64_rtx_cost_data const sw_64_rtx_cost_size = { + COSTS_N_INSNS (1), /* fp_add */ + COSTS_N_INSNS (1), /* fp_mult */ + COSTS_N_INSNS (1), /* fp_div_sf */ + COSTS_N_INSNS (1) + 1, /* fp_div_df */ + COSTS_N_INSNS (1) + 1, /* int_mult_si */ + COSTS_N_INSNS (1) + 2, /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (6), /* int_div */ +}; + +/* Get the number of args of a function in one of two ways. */ +#define NUM_ARGS crtl->args.info + +#define REG_PV 27 +#define REG_RA 26 + +/* Declarations of static functions. */ +static struct machine_function * +sw_64_init_machine_status (void); +static rtx +sw_64_emit_xfloating_compare (enum rtx_code *, rtx, rtx); +static void +sw_64_handle_trap_shadows (void); +static void +sw_64_align_insns (void); +static void +sw_64_override_options_after_change (void); + +static unsigned int +rest_of_handle_trap_shadows (void) +{ + sw_64_handle_trap_shadows (); + return 0; +} + +namespace { + +const pass_data pass_data_handle_trap_shadows = { + RTL_PASS, + "trap_shadows", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_handle_trap_shadows : public rtl_opt_pass +{ +public: + pass_handle_trap_shadows (gcc::context *ctxt) + : rtl_opt_pass (pass_data_handle_trap_shadows, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return sw_64_tp != SW_64_TP_PROG || flag_exceptions; + } + + virtual unsigned int execute (function *) + { + return rest_of_handle_trap_shadows (); + } + +}; // class pass_handle_trap_shadows + +} // namespace + +rtl_opt_pass * +make_pass_handle_trap_shadows (gcc::context *ctxt) +{ + return new pass_handle_trap_shadows (ctxt); +} + +static unsigned int +rest_of_align_insns (void) +{ + sw_64_align_insns (); + return 0; +} + +namespace { + +const pass_data pass_data_align_insns = { + RTL_PASS, + "align_insns", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_align_insns : public rtl_opt_pass +{ +public: + pass_align_insns (gcc::context *ctxt) + : rtl_opt_pass (pass_data_align_insns, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + /* Due to the number of extra memb insns, don't bother fixing up + alignment when trap precision is instruction. Moreover, we can + only do our job when sched2 is run. */ + return ((sw_64_tune != PROCESSOR_SW6 && sw_64_tune != PROCESSOR_SW8) + && optimize && !optimize_size && sw_64_tp != SW_64_TP_INSN + && flag_schedule_insns_after_reload); + } + + virtual unsigned int execute (function *) { return rest_of_align_insns (); } + +}; // class pass_align_insns + +} // namespace + +rtl_opt_pass * +make_pass_align_insns (gcc::context *ctxt) +{ + return new pass_align_insns (ctxt); +} + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +/* Implement TARGET_MANGLE_TYPE. */ + +static const char * +sw_64_mangle_type (const_tree type) +{ + if (TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_LONG_DOUBLE_128) + return "g"; + + /* For all other types, use normal C++ mangling. */ + return NULL; +} +#endif + +/* Parse target option strings. */ + +static void +sw_64_option_override (void) +{ + static const struct cpu_table + { + const char *const name; + const enum processor_type processor; + const int flags; + const unsigned short line_size; /* in bytes. */ + const unsigned short l1_size; /* in kb. */ + const unsigned short l2_size; /* in kb. */ + } cpu_table[] = { + {"sw6a", PROCESSOR_SW6, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW6A, 128, 32, + 512}, + {"sw6b", PROCESSOR_SW6, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW6B, 128, 32, + 512}, + {"sw8a", PROCESSOR_SW8, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW8A, 128, 32, + 512}, + }; + + int const ct_size = ARRAY_SIZE (cpu_table); + int line_size = 0, l1_size = 0, l2_size = 0; + int i; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + + /* Default to full IEEE compliance mode for Go language. */ + if (strcmp (lang_hooks.name, "GNU Go") == 0 + && !(target_flags_explicit & MASK_IEEE)) + target_flags |= MASK_IEEE; + + sw_64_fprm = SW_64_FPRM_NORM; + sw_64_tp = SW_64_TP_PROG; + sw_64_fptm = SW_64_FPTM_N; + + if (TARGET_IEEE) + { + sw_64_tp = SW_64_TP_INSN; + sw_64_fptm = SW_64_FPTM_SU; + } + if (TARGET_IEEE_WITH_INEXACT) + { + sw_64_tp = SW_64_TP_INSN; + sw_64_fptm = SW_64_FPTM_SUI; + } + if (TARGET_IEEE_MAIN) + { + sw_64_tp = SW_64_TP_INSN; + sw_64_fptm = SW_64_FPTM_SU; + } + + if (sw_64_tp_string) + { + if (!strcmp (sw_64_tp_string, "p")) + sw_64_tp = SW_64_TP_PROG; + else if (!strcmp (sw_64_tp_string, "f")) + sw_64_tp = SW_64_TP_FUNC; + else if (!strcmp (sw_64_tp_string, "i")) + sw_64_tp = SW_64_TP_INSN; + else + error ("bad value %qs for %<-mtrap-precision%> switch", + sw_64_tp_string); + } + + if (sw_64_fprm_string) + { + if (!strcmp (sw_64_fprm_string, "n")) + sw_64_fprm = SW_64_FPRM_NORM; + else if (!strcmp (sw_64_fprm_string, "m")) + sw_64_fprm = SW_64_FPRM_MINF; + else if (!strcmp (sw_64_fprm_string, "c")) + sw_64_fprm = SW_64_FPRM_CHOP; + else if (!strcmp (sw_64_fprm_string, "d")) + sw_64_fprm = SW_64_FPRM_DYN; + else + error ("bad value %qs for %<-mfp-rounding-mode%> switch", + sw_64_fprm_string); + } + + if (sw_64_fptm_string) + { + if (strcmp (sw_64_fptm_string, "n") == 0) + sw_64_fptm = SW_64_FPTM_N; + else if (strcmp (sw_64_fptm_string, "u") == 0) + sw_64_fptm = SW_64_FPTM_U; + else if (strcmp (sw_64_fptm_string, "su") == 0) + sw_64_fptm = SW_64_FPTM_SU; + else if (strcmp (sw_64_fptm_string, "sui") == 0) + sw_64_fptm = SW_64_FPTM_SUI; + else + error ("bad value %qs for %<-mfp-trap-mode%> switch", + sw_64_fptm_string); + } + + if (sw_64_cpu_string) + { + for (i = 0; i < ct_size; i++) + if (!strcmp (sw_64_cpu_string, cpu_table[i].name)) + { + sw_64_tune = sw_64_cpu = cpu_table[i].processor; + line_size = cpu_table[i].line_size; + l1_size = cpu_table[i].l1_size; + l2_size = cpu_table[i].l2_size; + target_flags &= ~(MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX + | MASK_SW6A | MASK_SW6B | MASK_SW8A); + target_flags |= cpu_table[i].flags; + break; + } + if (i == ct_size) + error ("bad value %qs for %<-mcpu%> switch", sw_64_cpu_string); + } + + if (sw_64_tune_string) + { + for (i = 0; i < ct_size; i++) + if (!strcmp (sw_64_tune_string, cpu_table[i].name)) + { + sw_64_tune = cpu_table[i].processor; + line_size = cpu_table[i].line_size; + l1_size = cpu_table[i].l1_size; + l2_size = cpu_table[i].l2_size; + break; + } + if (i == ct_size) + error ("bad value %qs for %<-mtune%> switch", sw_64_tune_string); + } + if (line_size) + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_l1_cache_line_size, line_size); + if (l1_size) + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_l1_cache_size, l1_size); + if (l2_size) + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_l2_cache_size, l2_size); + + // generate prefetch for cases like stream add + if (flag_sw_prefetch_add == 1) + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_prefetch_min_insn_to_mem_ratio, 2); + + if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch) + flag_prefetch_loop_arrays = 1; + + /* set simultaneous prefetches and latency for sw + * * need add some conditions to decide what the cpu kind. */ + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_simultaneous_prefetches, 8); + + if (flag_sw_prefetch_unroll == 1) + { + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_max_unrolled_insns, 400); + } + /* Do some sanity checks on the above options. */ + + if ((sw_64_fptm == SW_64_FPTM_SU || sw_64_fptm == SW_64_FPTM_SUI) + && sw_64_tp != SW_64_TP_INSN && sw_64_cpu != PROCESSOR_SW6 + && sw_64_cpu != PROCESSOR_SW8) + { + warning (0, "fp software completion requires %<-mtrap-precision=i%>"); + sw_64_tp = SW_64_TP_INSN; + } + + if (sw_64_cpu == PROCESSOR_SW6 || sw_64_cpu == PROCESSOR_SW8) + { + /* Except for SW6 pass 1 (not released), we always have precise + arithmetic traps. Which means we can do software completion + without minding trap shadows. */ + sw_64_tp = SW_64_TP_PROG; + } + + if (TARGET_FLOAT_VAX) + { + if (sw_64_fprm == SW_64_FPRM_MINF || sw_64_fprm == SW_64_FPRM_DYN) + { + warning (0, "rounding mode not supported for VAX floats"); + sw_64_fprm = SW_64_FPRM_NORM; + } + if (sw_64_fptm == SW_64_FPTM_SUI) + { + warning (0, "trap mode not supported for VAX floats"); + sw_64_fptm = SW_64_FPTM_SU; + } + if (target_flags_explicit & MASK_LONG_DOUBLE_128) + warning (0, "128-bit long double not supported for VAX floats"); + target_flags &= ~MASK_LONG_DOUBLE_128; + } + + { + char *end; + int lat; + + if (!sw_64_mlat_string) + sw_64_mlat_string = "L1"; + + if (ISDIGIT ((unsigned char) sw_64_mlat_string[0]) + && (lat = strtol (sw_64_mlat_string, &end, 10), *end == '\0')) + ; + else if ((sw_64_mlat_string[0] == 'L' || sw_64_mlat_string[0] == 'l') + && ISDIGIT ((unsigned char) sw_64_mlat_string[1]) + && sw_64_mlat_string[2] == '\0') + { + static int cache_latency[][4] = { + {3, 12, 30}, /* sw6 -- Bcache from LMbench. */ + // { 4, 15, 90 }, /* sw6b -- Bcache from LMbench. */ + {3, 7, 11}, /* sw8a -- Bcache from LMbench. */ + }; + if (flag_sw_rtx_cost) + { + cache_latency[sw_64_tune][0] = 3; + cache_latency[sw_64_tune][1] = 7; + cache_latency[sw_64_tune][2] = 11; + } + + lat = sw_64_mlat_string[1] - '0'; + if (lat <= 0 || lat > 3 || cache_latency[sw_64_tune][lat - 1] == -1) + { + warning (0, "L%d cache latency unknown for %s", lat, + sw_64_cpu_name[sw_64_tune]); + lat = 3; + } + else + lat = cache_latency[sw_64_tune][lat - 1]; + } + else if (!strcmp (sw_64_mlat_string, "main")) + { + /* Most current memories have about 370ns latency. This is + a reasonable guess for a fast cpu. */ + lat = 150; + } + else + { + warning (0, "bad value %qs for %<-mmemory-latency%>", + sw_64_mlat_string); + lat = 3; + } + + sw_64_memory_latency = lat; + } + + /* Default the definition of "small data" to 8 bytes. */ + if (!global_options_set.x_g_switch_value) + g_switch_value = 8; + + /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ + if (flag_pic == 1) + target_flags |= MASK_SMALL_DATA; + else if (flag_pic == 2) + target_flags &= ~MASK_SMALL_DATA; + + sw_64_override_options_after_change (); + + /* Register variables and functions with the garbage collector. */ + + /* Set up function hooks. */ + init_machine_status = sw_64_init_machine_status; + + /* Tell the compiler when we're using VAX floating point. */ + if (TARGET_FLOAT_VAX) + { + REAL_MODE_FORMAT (SFmode) = &vax_f_format; + REAL_MODE_FORMAT (DFmode) = &vax_g_format; + REAL_MODE_FORMAT (TFmode) = NULL; + } + +#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 + if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) + target_flags |= MASK_LONG_DOUBLE_128; +#endif +} + +/* Implement targetm.override_options_after_change. */ + +static void +sw_64_override_options_after_change (void) +{ + /* Align labels and loops for optimal branching. */ + /* ??? Kludge these by not doing anything if we don't optimize. */ + if (optimize > 0) + { + if (flag_align_loops && !str_align_loops) + str_align_loops = "16"; + if (flag_align_jumps && !str_align_jumps) + str_align_jumps = "16"; + } + if (flag_align_functions && !str_align_functions) + str_align_functions = "16"; +} + +/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ + +int +zap_mask (HOST_WIDE_INT value) +{ + int i; + + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++, value >>= 8) + if ((value & 0xff) != 0 && (value & 0xff) != 0xff) + return 0; + + return 1; +} + +/* Return true if OP is valid for a particular TLS relocation. + We are already guaranteed that OP is a CONST. */ + +int +tls_symbolic_operand_1 (rtx op, int size, int unspec) +{ + op = XEXP (op, 0); + + if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) + return 0; + op = XVECEXP (op, 0, 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + switch (SYMBOL_REF_TLS_MODEL (op)) + { + case TLS_MODEL_LOCAL_DYNAMIC: + return unspec == UNSPEC_DTPREL && size == sw_64_tls_size; + case TLS_MODEL_INITIAL_EXEC: + return unspec == UNSPEC_TPREL && size == 64; + case TLS_MODEL_LOCAL_EXEC: + return unspec == UNSPEC_TPREL && size == sw_64_tls_size; + default: + gcc_unreachable (); + } +} + +/* Used by aligned_memory_operand and unaligned_memory_operand to + resolve what reload is going to do with OP if it's a register. */ + +rtx +resolve_reload_operand (rtx op) +{ + if (reload_in_progress) + { + rtx tmp = op; + if (SUBREG_P (tmp)) + tmp = SUBREG_REG (tmp); + if (REG_P (tmp) && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) + { + op = reg_equiv_memory_loc (REGNO (tmp)); + if (op == 0) + return 0; + } + } + return op; +} + +/* The scalar modes supported differs from the default check-what-c-supports + version in that sometimes TFmode is available even when long double + indicates only DFmode. */ + +static bool +sw_64_scalar_mode_supported_p (scalar_mode mode) +{ + switch (mode) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_TImode: /* via optabs.c. */ + return true; + + case E_SFmode: + case E_DFmode: + return true; + + case E_TFmode: + return TARGET_HAS_XFLOATING_LIBS; + + default: + return false; + } +} + +/* Sw_64 implements a couple of integer vector mode operations when + TARGET_MAX is enabled. We do not check TARGET_MAX here, however, + which allows the vectorizer to operate on e.g. move instructions, + or when expand_vector_operations can do something useful. */ + +static bool +sw_64_vector_mode_supported_p (machine_mode mode) +{ + return mode == V8QImode || mode == V4HImode || mode == V2SImode; +} + +/* Return the TLS model to use for SYMBOL. */ + +static enum tls_model +tls_symbolic_operand_type (rtx symbol) +{ + enum tls_model model; + + if (GET_CODE (symbol) != SYMBOL_REF) + return TLS_MODEL_NONE; + model = SYMBOL_REF_TLS_MODEL (symbol); + + /* Local-exec with a 64-bit size is the same code as initial-exec. */ + if (model == TLS_MODEL_LOCAL_EXEC && sw_64_tls_size == 64) + model = TLS_MODEL_INITIAL_EXEC; + + return model; +} + +/* Return true if the function DECL will share the same GP as any + function in the current unit of translation. */ + +static bool +decl_has_samegp (const_tree decl) +{ + /* Functions that are not local can be overridden, and thus may + not share the same gp. */ + if (!(*targetm.binds_local_p) (decl)) + return false; + + /* If -msmall-data is in effect, assume that there is only one GP + for the module, and so any local symbol has this property. We + need explicit relocations to be able to enforce this for symbols + not defined in this unit of translation, however. */ + if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) + return true; + + /* Functions that are not external are defined in this UoT. */ + /* ??? Irritatingly, static functions not yet emitted are still + marked "external". Apply this to non-static functions only. */ + return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); +} + +/* Return true if EXP should be placed in the small data section. */ + +static bool +sw_64_in_small_data_p (const_tree exp) +{ + /* We want to merge strings, so we never consider them small data. */ + if (TREE_CODE (exp) == STRING_CST) + return false; + + /* Functions are never in the small data area. Duh. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + /* COMMON symbols are never small data. */ + if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp)) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = DECL_SECTION_NAME (exp); + if (strcmp (section, ".sdata") == 0 || strcmp (section, ".sbss") == 0) + return true; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in sdata because it might be too big when completed. */ + if (size > 0 && size <= g_switch_value) + return true; + } + + return false; +} + +/* legitimate_address_p recognizes an RTL expression that is a valid + memory address for an instruction. The MODE argument is the + machine mode for the MEM expression that wants to use this address. + + For Sw_64, we have either a constant address or the sum of a + register and a constant address, or just a register. For DImode, + any of those forms can be surrounded with an AND that clear the + low-order three bits; this is an "unaligned" access. */ + +static bool +sw_64_legitimate_address_p (machine_mode mode, rtx x, bool strict) +{ + /* If this is an ldl_u type address, discard the outer AND. */ + if (((TARGET_SW_M32 && mode == SImode) || (!TARGET_SW_M32 && mode == DImode)) + && GET_CODE (x) == AND && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) == -8) + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (SUBREG_P (x) + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + /* Unadorned general registers are valid. */ + if (REG_P (x) + && (strict ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x))) + return true; + + /* Constant addresses (i.e. +/- 32k) are valid. */ + if (CONSTANT_ADDRESS_P (x)) + return true; + + if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC + || GET_CODE (x) == POST_MODIFY) + && TARGET_SW8A + && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) + : NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)))) + return true; + /* Register plus a small constant offset is valid. */ + if (GET_CODE (x) == PLUS) + { + rtx ofs = XEXP (x, 1); + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (SUBREG_P (x) + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + if (REG_P (x)) + { + if (!strict && NONSTRICT_REG_OK_FP_BASE_P (x) && CONST_INT_P (ofs)) + return true; + if ((strict ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x)) + && CONSTANT_ADDRESS_P (ofs)) + return true; + } + } + + /* If we're managing explicit relocations, LO_SUM is valid, as are small + data symbols. Avoid explicit relocations of modes larger than word + mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ + else if (TARGET_EXPLICIT_RELOCS && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) + { + if (small_symbolic_operand (x, Pmode)) + return true; + + if (GET_CODE (x) == LO_SUM) + { + rtx ofs = XEXP (x, 1); + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (SUBREG_P (x) + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + /* Must have a valid base register. */ + if (!(REG_P (x) + && (strict ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x)))) + return false; + + /* The symbol must be local. */ + if (local_symbolic_operand (ofs, Pmode) + || dtp32_symbolic_operand (ofs, Pmode) + || tp32_symbolic_operand (ofs, Pmode)) + return true; + } + } + + return false; +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY (()) rtx tls_get_addr_libfunc; + +static rtx +get_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. */ + +static rtx +sw_64_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode) +{ + HOST_WIDE_INT addend; + + /* If the address is (plus reg const_int) and the CONST_INT is not a + valid offset, compute the high part of the constant and add it to + the register. Then our address is (plus temp low-part-const). */ + if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)) + && !CONSTANT_ADDRESS_P (XEXP (x, 1))) + { + addend = INTVAL (XEXP (x, 1)); + x = XEXP (x, 0); + goto split_addend; + } + + /* If the address is (const (plus FOO const_int)), find the low-order + part of the CONST_INT. Then load FOO plus any high-order part of the + CONST_INT into a register. Our address is (plus reg low-part-const). + This is done to reduce the number of GOT entries. */ + if (can_create_pseudo_p () && GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + addend = INTVAL (XEXP (XEXP (x, 0), 1)); + x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); + goto split_addend; + } + + /* If we have a (plus reg const), emit the load as in (2), then add + the two registers, and finally generate (plus reg low-part-const) as + our address. */ + if (can_create_pseudo_p () && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) + { + addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); + x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), + XEXP (XEXP (XEXP (x, 1), 0), 0), NULL_RTX, 1, + OPTAB_LIB_WIDEN); + goto split_addend; + } + + /* If this is a local symbol, split the address into HIGH/LO_SUM parts. + Avoid modes larger than word mode since i.e. $LC0+8($1) can fold + around +/- 32k offset. */ + if (TARGET_EXPLICIT_RELOCS && GET_MODE_SIZE (mode) <= UNITS_PER_WORD + && symbolic_operand (x, Pmode)) + { + rtx r0, r16, eqv, tga, tp, dest, seq; + rtx_insn *insn; + + switch (tls_symbolic_operand_type (x)) + { + case TLS_MODEL_NONE: + break; + + case TLS_MODEL_GLOBAL_DYNAMIC: + { + start_sequence (); + + r0 = gen_rtx_REG (Pmode, 0); + r16 = gen_rtx_REG (Pmode, 16); + tga = get_tls_get_addr (); + dest = gen_reg_rtx (Pmode); + seq = GEN_INT (sw_64_next_sequence_number++); + if (sw_64_tls_gd == 16) + { + emit_insn ( + gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); + } + else if (sw_64_tls_gd == 32) + { + eqv + = gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, pic_offset_table_rtx, x, seq), + UNSPEC_TLSRELGOT); + + emit_insn (gen_rtx_SET (r16, eqv)); + emit_insn (gen_movdi_er_tlsgd (r16, r16, x, seq)); + } + rtx val = gen_call_value_osf_tlsgd (r0, tga, seq); + insn = emit_call_insn (val); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); + + insn = get_insns (); + end_sequence (); + + emit_libcall_block (insn, dest, r0, x); + return dest; + } + + case TLS_MODEL_LOCAL_DYNAMIC: + { + start_sequence (); + + r0 = gen_rtx_REG (Pmode, 0); + r16 = gen_rtx_REG (Pmode, 16); + tga = get_tls_get_addr (); + scratch = gen_reg_rtx (Pmode); + seq = GEN_INT (sw_64_next_sequence_number++); + if (sw_64_tls_ldm == 16) + { + emit_insn ( + gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); + } + else if (sw_64_tls_ldm == 32) + { + eqv + = gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, pic_offset_table_rtx, x, seq), + UNSPEC_TLSRELGOT); + + emit_insn (gen_rtx_SET (r16, eqv)); + emit_insn (gen_movdi_er_tlsldm (r16, r16, seq)); + } + rtx val = gen_call_value_osf_tlsldm (r0, tga, seq); + insn = emit_call_insn (val); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); + + insn = get_insns (); + end_sequence (); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLSLDM_CALL); + emit_libcall_block (insn, scratch, r0, eqv); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + + if (sw_64_tls_size == 64) + { + if (sw_64_tls_gotdtprel == 16) + { + dest = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (dest, eqv)); + emit_insn (gen_adddi3 (dest, dest, scratch)); + } + else if (sw_64_tls_gotdtprel == 32) + { + seq = GEN_INT (sw_64_next_sequence_number++); + eqv = gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, pic_offset_table_rtx, x, + seq), + UNSPEC_TLSRELGOT); + dest = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (dest, eqv)); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, dest, x, seq), + UNSPEC_GOTDTPREL); + emit_insn (gen_rtx_SET (dest, eqv)); + + emit_insn (gen_adddi3 (dest, dest, scratch)); + } + return dest; + } + if (sw_64_tls_size == 32) + { + rtx temp = gen_rtx_HIGH (Pmode, eqv); + temp = gen_rtx_PLUS (Pmode, scratch, temp); + scratch = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (scratch, temp)); + } + return gen_rtx_LO_SUM (Pmode, scratch, eqv); + } + + case TLS_MODEL_INITIAL_EXEC: + { + if (sw_64_tls_gottprel == 16) + { + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + tp = gen_reg_rtx (Pmode); + scratch = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); + + emit_insn (gen_get_thread_pointerdi (tp)); + emit_insn (gen_rtx_SET (scratch, eqv)); + emit_insn (gen_adddi3 (dest, tp, scratch)); + } + else if (sw_64_tls_gottprel == 32) + { + seq = GEN_INT (sw_64_next_sequence_number++); + + tp = gen_reg_rtx (Pmode); + emit_insn (gen_get_thread_pointerdi (tp)); + + scratch = gen_reg_rtx (Pmode); + eqv + = gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, pic_offset_table_rtx, x, seq), + UNSPEC_TLSRELGOT); + emit_insn (gen_rtx_SET (scratch, eqv)); + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, scratch, x, seq), + UNSPEC_TPREL); + emit_insn (gen_rtx_SET (scratch, eqv)); + + dest = gen_reg_rtx (Pmode); + emit_insn (gen_adddi3 (dest, tp, scratch)); + } + return dest; + } + + case TLS_MODEL_LOCAL_EXEC: + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + tp = gen_reg_rtx (Pmode); + + emit_insn (gen_get_thread_pointerdi (tp)); + if (sw_64_tls_size == 32) + { + rtx temp = gen_rtx_HIGH (Pmode, eqv); + temp = gen_rtx_PLUS (Pmode, tp, temp); + tp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (tp, temp)); + } + return gen_rtx_LO_SUM (Pmode, tp, eqv); + + default: + gcc_unreachable (); + } + + if (local_symbolic_operand (x, Pmode)) + { + if (small_symbolic_operand (x, Pmode)) + return x; + else + { + if (can_create_pseudo_p ()) + scratch = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x))); + return gen_rtx_LO_SUM (Pmode, scratch, x); + } + } + } + + return NULL; + +split_addend: + { + HOST_WIDE_INT low, high; + + low = ((addend & 0xffff) ^ 0x8000) - 0x8000; + addend -= low; + high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; + addend -= high; + + if (addend) + x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), + (!can_create_pseudo_p () ? scratch : NULL_RTX), + 1, OPTAB_LIB_WIDEN); + if (high) + x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), + (!can_create_pseudo_p () ? scratch : NULL_RTX), + 1, OPTAB_LIB_WIDEN); + + return plus_constant (Pmode, x, low); + } +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. Return X or the new, valid address. */ + +static rtx +sw_64_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, machine_mode mode) +{ + rtx new_x = sw_64_legitimize_address_1 (x, NULL_RTX, mode); + return new_x ? new_x : x; +} + +/* Return true if ADDR has an effect that depends on the machine mode it + is used for. On the Sw_64 this is true only for the unaligned modes. + We can simplify the test since we know that the address must be valid. */ + +static bool +sw_64_mode_dependent_address_p (const_rtx addr, + addr_space_t as ATTRIBUTE_UNUSED) +{ + return GET_CODE (addr) == AND; +} + +/* Primarily this is required for TLS symbols, but given that our move + patterns *ought* to be able to handle any symbol at any time, we + should never be spilling symbolic operands to the constant pool, ever. */ + +static bool +sw_64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + enum rtx_code code = GET_CODE (x); + return code == SYMBOL_REF || code == LABEL_REF || code == CONST; +} + +/* We do not allow indirect calls to be optimized into sibling calls, nor + can we allow a call to a function with a different GP to be optimized + into a sibcall. */ + +static bool +sw_64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + /* Can't do indirect tail calls, since we don't know if the target + uses the same GP. */ + if (!decl) + return false; + + /* Otherwise, we can make a tail call if the target function shares + the same GP. */ + return decl_has_samegp (decl); +} + +bool +some_small_symbolic_operand_int (rtx x) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) + { + rtx x = *iter; + /* Don't re-split. */ + if (GET_CODE (x) == LO_SUM) + iter.skip_subrtxes (); + else if (small_symbolic_operand (x, Pmode)) + return true; + } + return false; +} + +rtx +split_small_symbolic_operand (rtx x) +{ + x = copy_insn (x); + subrtx_ptr_iterator::array_type array; + FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL) + { + rtx *ptr = *iter; + rtx x = *ptr; + /* Don't re-split. */ + if (GET_CODE (x) == LO_SUM) + iter.skip_subrtxes (); + else if (small_symbolic_operand (x, Pmode)) + { + *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); + iter.skip_subrtxes (); + } + } + return x; +} + +/* Indicate that INSN cannot be duplicated. This is true for any insn + that we've marked with gpdisp relocs, since those have to stay in + 1-1 correspondence with one another. + + Technically we could copy them if we could set up a mapping from one + sequence number to another, across the set of insns to be duplicated. + This seems overly complicated and error-prone since interblock motion + from sched-ebb could move one of the pair of insns to a different block. + + Also cannot allow call insns to be duplicated. If they throw exceptions, + then they'll be in a different block from their ldgp. Which could lead + the bb reorder code to think that it would be ok to copy just the block + containing the call and branch to the block containing the ldgp. */ + +static bool +sw_64_cannot_copy_insn_p (rtx_insn *insn) +{ + if (!reload_completed || !TARGET_EXPLICIT_RELOCS) + return false; + if (recog_memoized (insn) >= 0) + return get_attr_cannot_copy (insn); + else + return false; +} + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and return the new rtx. */ + +rtx +sw_64_legitimize_reload_address (rtx x, machine_mode mode ATTRIBUTE_UNUSED, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS + && REG_P (XEXP (XEXP (x, 0), 0)) && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && CONST_INT_P (XEXP (x, 1))) + { + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, BASE_REG_CLASS, + GET_MODE (x), VOIDmode, 0, 0, opnum, + (enum reload_type) type); + return x; + } + + /* We wish to handle large displacements off a base register by + splitting the addend across an ldih and the mem insn. This + cuts number of extra insns needed from 3 to 1. */ + if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER + && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) && CONST_INT_P (XEXP (x, 1))) + { + HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT high + = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; + + /* Check for 32-bit overflow. */ + if (high + low != val) + return NULL_RTX; + + /* Reload the high part into a base reg; leave the low part + in the mem directly. */ + x = gen_rtx_PLUS (GET_MODE (x), + gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), + GEN_INT (high)), + GEN_INT (low)); + + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, BASE_REG_CLASS, + GET_MODE (x), VOIDmode, 0, 0, opnum, + (enum reload_type) type); + return x; + } + + return NULL_RTX; +} + +/* Return the cost of moving between registers of various classes. Moving + between FLOAT_REGS and anything else except float regs is expensive. + In fact, we make it quite expensive because we really don't want to + do these moves unless it is clearly worth it. Optimizations may + reduce the impact of not being able to allocate a pseudo to a + hard register. */ + +static int +sw_64_register_move_cost (machine_mode mode, reg_class_t from_i, + reg_class_t to_i) +{ + enum reg_class from = (enum reg_class) from_i; + enum reg_class to = (enum reg_class) to_i; + if (!flag_sw_rtx_cost) + { + if ((from == FLOAT_REGS) == (to == FLOAT_REGS)) + return 2; + if (TARGET_FIX) + return (from == FLOAT_REGS) ? 6 : 8; + return 4 + 2 * sw_64_memory_latency; + } + if (from == R0_REG || from == R24_REG || from == R25_REG || from == R27_REG) + from = GENERAL_REGS; + if (to == R0_REG || to == R24_REG || to == R25_REG || to == R27_REG) + to = GENERAL_REGS; + if (GET_MODE_SIZE (mode) == 32) + { + if (from == GENERAL_REGS && to == GENERAL_REGS) + return 1; + else if (from == GENERAL_REGS) + return 16; + else if (to == GENERAL_REGS) + return 16; + if (!TARGET_SW_SIMD) + return 34; + return 2; + } + if (from == GENERAL_REGS && to == GENERAL_REGS) + return 1; + else if (from == GENERAL_REGS) + return 4; + else if (to == GENERAL_REGS) + return 4; + return 2; +} + +/* Return the cost of moving data of MODE from a register to + or from memory. On the Sw_64, bump this up a bit. */ + +static int +sw_64_memory_move_cost (machine_mode /*mode. */, reg_class_t /*regclass. */, + bool /*in. */) +{ + if (flag_sw_rtx_cost) + return sw_64_memory_latency; + return 2 * sw_64_memory_latency; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +sw_64_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total, + bool speed) +{ + int code = GET_CODE (x); + bool float_mode_p = FLOAT_MODE_P (mode); + const struct sw_64_rtx_cost_data *cost_data; + + if (!speed) + cost_data = &sw_64_rtx_cost_size; + else if (flag_sw_rtx_cost) + cost_data = &sw_64_rtx_cost_data[2]; + else + cost_data = &sw_64_rtx_cost_data[sw_64_tune]; + + switch (code) + { + case CONST_INT: + /* If this is an 8-bit constant, return zero since it can be used + nearly anywhere with no cost. If it is a valid operand for an + ADD or AND, likewise return 0 if we know it will be used in that + context. Otherwise, return 2 since it might be used there later. + All other constants take at least two insns. */ + if (INTVAL (x) >= 0 && INTVAL (x) < 256) + { + *total = 0; + return true; + } + /* FALLTHRU */ + + case CONST_DOUBLE: + case CONST_WIDE_INT: + if (x == CONST0_RTX (mode)) + *total = 0; + else if ((outer_code == PLUS && add_operand (x, VOIDmode)) + || (outer_code == AND && and_operand (x, VOIDmode))) + *total = 0; + else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) + *total = 2; + else + *total = COSTS_N_INSNS (2); + return true; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) + *total = COSTS_N_INSNS (outer_code != MEM); + else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) + *total = COSTS_N_INSNS (1 + (outer_code != MEM)); + else if (tls_symbolic_operand_type (x)) + /* ??? How many insns do we emit here? More than one... */ + *total = COSTS_N_INSNS (15); + else + /* Otherwise we do a load from the GOT. */ + *total = COSTS_N_INSNS (!speed ? 1 : sw_64_memory_latency); + return true; + + case HIGH: + /* This is effectively an add_operand. */ + *total = 2; + return true; + + case PLUS: + case MINUS: + if (float_mode_p) + *total = cost_data->fp_add; + else if ((GET_CODE (XEXP (x, 0)) == ASHIFT) + || (GET_CODE (XEXP (x, 0)) == MULT) + && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) + { + *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode, + (enum rtx_code) outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) outer_code, + opno, speed) + + COSTS_N_INSNS (1)); + return true; + } + return false; + + case MULT: + if (float_mode_p) + *total = cost_data->fp_mult; + else if (mode == DImode) + *total = cost_data->int_mult_di; + else + *total = cost_data->int_mult_si; + return false; + + case ASHIFT: + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) <= 3) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ASHIFTRT: + case LSHIFTRT: + *total = cost_data->int_shift; + return false; + + case IF_THEN_ELSE: + if (float_mode_p) + *total = cost_data->fp_add; + else + *total = cost_data->int_cmov; + if (flag_sw_rtx_cost && float_mode_p) + *total = COSTS_N_INSNS (2); + return false; + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (!float_mode_p) + *total = cost_data->int_div; + else if (mode == SFmode) + *total = cost_data->fp_div_sf; + else + *total = cost_data->fp_div_df; + return false; + + case MEM: + *total = COSTS_N_INSNS (!speed ? 1 : sw_64_memory_latency); + return true; + + case NEG: + if (!float_mode_p) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ABS: + if (!float_mode_p) + { + *total = COSTS_N_INSNS (1) + cost_data->int_cmov; + return false; + } + if (flag_sw_rtx_cost) + { + *total = COSTS_N_INSNS (2); + return false; + } + /* FALLTHRU */ + + case FLOAT: + case UNSIGNED_FLOAT: + case FIX: + case UNSIGNED_FIX: + if (flag_sw_rtx_cost) + { + *total = COSTS_N_INSNS (4); + return false; + } + case FLOAT_TRUNCATE: + *total = cost_data->fp_add; + return false; + + case FLOAT_EXTEND: + if (MEM_P (XEXP (x, 0))) + *total = 0; + else + *total = cost_data->fp_add; + return false; + + default: + return false; + } +} + +/* REF is an alignable memory location. Place an aligned SImode + reference into *PALIGNED_MEM and the number of bits to shift into + *PBITNUM. SCRATCH is a free register for use in reloading out + of range stack slots. */ + +void +get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) +{ + rtx base; + HOST_WIDE_INT disp, offset; + + gcc_assert (MEM_P (ref)); + + if (reload_in_progress) + { + base = find_replacement (&XEXP (ref, 0)); + gcc_assert (memory_address_p (GET_MODE (ref), base)); + } + else + base = XEXP (ref, 0); + + if (GET_CODE (base) == PLUS) + disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); + else + disp = 0; + + /* Find the byte offset within an aligned word. If the memory itself is + claimed to be aligned, believe it. Otherwise, aligned_memory_operand + will have examined the base register and determined it is aligned, and + thus displacements from it are naturally alignable. */ + if (MEM_ALIGN (ref) >= 32) + offset = 0; + else + offset = disp & 3; + + /* The location should not cross aligned word boundary. */ + gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) + <= GET_MODE_SIZE (SImode)); + + /* Access the entire aligned word. */ + *paligned_mem = widen_memory_access (ref, SImode, -offset); + + /* Convert the byte offset within the word to a bit offset. */ + offset *= BITS_PER_UNIT; + *pbitnum = GEN_INT (offset); +} + +/* Similar, but just get the address. Handle the two reload cases. + Add EXTRA_OFFSET to the address we return. */ + +rtx +get_unaligned_address (rtx ref) +{ + rtx base; + HOST_WIDE_INT offset = 0; + + gcc_assert (MEM_P (ref)); + + if (reload_in_progress) + { + base = find_replacement (&XEXP (ref, 0)); + gcc_assert (memory_address_p (GET_MODE (ref), base)); + } + else + base = XEXP (ref, 0); + + if (GET_CODE (base) == PLUS) + offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); + + return plus_constant (Pmode, base, offset); +} + +/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. + X is always returned in a register. */ + +rtx +get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) +{ + if (GET_CODE (addr) == PLUS) + { + ofs += INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), NULL_RTX, 1, + OPTAB_LIB_WIDEN); +} + +/* On the Sw_64, all (non-symbolic) constants except zero go into + a floating-point register via memory. Note that we cannot + return anything that is not a subset of RCLASS, and that some + symbolic constants cannot be dropped to memory. */ + +enum reg_class +sw_64_preferred_reload_class (rtx x, enum reg_class rclass) +{ + /* Zero is present in any register class. */ + if (x == CONST0_RTX (GET_MODE (x))) + return rclass; + + /* These sorts of constants we can easily drop to memory. */ + if (CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) + || GET_CODE (x) == CONST_VECTOR) + { + if (rclass == FLOAT_REGS) + return NO_REGS; + if (rclass == ALL_REGS) + return GENERAL_REGS; + return rclass; + } + + /* All other kinds of constants should not (and in the case of HIGH + cannot) be dropped to memory -- instead we use a GENERAL_REGS + secondary reload. */ + if (CONSTANT_P (x)) + return (rclass == ALL_REGS ? GENERAL_REGS : rclass); + + return rclass; +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. */ + +static reg_class_t +sw_64_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + machine_mode mode, secondary_reload_info *sri) +{ + enum reg_class rclass = (enum reg_class) rclass_i; + + /* Loading and storing HImode or QImode values to and from memory + usually requires a scratch register. */ + if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) + { + if (any_memory_operand (x, mode)) + { + if (in_p) + { + if (!aligned_memory_operand (x, mode)) + sri->icode = direct_optab_handler (reload_in_optab, mode); + } + else + sri->icode = direct_optab_handler (reload_out_optab, mode); + return NO_REGS; + } + } + + /* We also cannot do integral arithmetic into FP regs, as might result + from register elimination into a DImode fp register. */ + if (rclass == FLOAT_REGS) + { + if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) + return GENERAL_REGS; + if (in_p && INTEGRAL_MODE_P (mode) && !MEM_P (x) && !REG_P (x) + && !CONST_INT_P (x)) + return GENERAL_REGS; + } + + return NO_REGS; +} + +/* Implement TARGET_SECONDARY_MEMORY_NEEDED. + + If we are copying between general and FP registers, we need a memory + location unless the FIX extension is available. */ + +static bool +sw_64_secondary_memory_needed (machine_mode, reg_class_t class1, + reg_class_t class2) +{ + return (!TARGET_FIX + && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS) + || (class2 == FLOAT_REGS && class1 != FLOAT_REGS))); +} + +/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is + floating-point, use it. Otherwise, widen to a word like the default. + This is needed because we always store integers in FP registers in + quadword format. This whole area is very tricky! */ + +static machine_mode +sw_64_secondary_memory_needed_mode (machine_mode mode) +{ + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + return mode; + if (GET_MODE_SIZE (mode) >= 4) + return mode; + return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require (); +} + +/* Given SEQ, which is an INSN list, look for any MEMs in either + a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and + volatile flags from REF into each of the MEMs found. If REF is not + a MEM, don't do anything. */ + +void +sw_64_set_memflags (rtx seq, rtx ref) +{ + rtx_insn *insn; + + if (!MEM_P (ref)) + return; + + /* This is only called from sw_64.md, after having had something + generated from one of the insn patterns. So if everything is + zero, the pattern is already up-to-date. */ + if (!MEM_VOLATILE_P (ref) && !MEM_NOTRAP_P (ref) && !MEM_READONLY_P (ref)) + return; + + subrtx_var_iterator::array_type array; + for (insn = as_a (seq); insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) + { + rtx x = *iter; + if (MEM_P (x)) + { + MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref); + MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref); + MEM_READONLY_P (x) = MEM_READONLY_P (ref); + /* Sadly, we cannot use alias sets because the extra + aliasing produced by the AND interferes. Given that + two-byte quantities are the only thing we would be + able to differentiate anyway, there does not seem to + be any point in convoluting the early out of the + alias check. */ + iter.skip_subrtxes (); + } + } + else + gcc_unreachable (); +} + +static rtx +sw_64_emit_set_const (rtx, machine_mode, HOST_WIDE_INT, int, bool); + +/* Internal routine for sw_64_emit_set_const to check for N or below insns. + If NO_OUTPUT is true, then we only check to see if N insns are possible, + and return pc_rtx if successful. */ + +static rtx +sw_64_emit_set_const_1 (rtx target, machine_mode mode, HOST_WIDE_INT c, int n, + bool no_output) +{ + HOST_WIDE_INT new_const; + int i, bits; + /* Use a pseudo if highly optimizing and still generating RTL. */ + rtx subtarget + = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); + rtx temp, insn; + + /* If this is a sign-extended 32-bit constant, we can do this in at most + three insns, so do it if we have enough insns left. */ + + if (c >> 31 == -1 || c >> 31 == 0) + { + HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT tmp1 = c - low; + HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT extra = 0; + + /* If HIGH will be interpreted as negative but the constant is + positive, we must adjust it to do two ldha insns. */ + + if ((high & 0x8000) != 0 && c >= 0) + { + extra = 0x4000; + tmp1 -= 0x40000000; + high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); + } + + if (c == low || (low == 0 && extra == 0)) + { + /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) + but that meant that we can't handle INT_MIN on 32-bit machines + (like NT/Sw_64), because we recurse indefinitely through + emit_move_insn to gen_movdi. So instead, since we know exactly + what we want, create it explicitly. */ + + if (no_output) + return pc_rtx; + if (target == NULL) + target = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (target, GEN_INT (c))); + return target; + } + else if (n >= 2 + (extra != 0)) + { + if (no_output) + return pc_rtx; + if (!can_create_pseudo_p ()) + { + emit_insn (gen_rtx_SET (target, GEN_INT (high << 16))); + temp = target; + } + else + temp + = copy_to_suggested_reg (GEN_INT (high << 16), subtarget, mode); + + /* As of 2002-02-23, addsi3 is only available when not optimizing. + This means that if we go through expand_binop, we'll try to + generate extensions, etc, which will require new pseudos, which + will fail during some split phases. The SImode add patterns + still exist, but are not named. So build the insns by hand. */ + + if (extra != 0) + { + if (!subtarget) + subtarget = gen_reg_rtx (mode); + insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); + insn = gen_rtx_SET (subtarget, insn); + emit_insn (insn); + temp = subtarget; + } + + if (target == NULL) + target = gen_reg_rtx (mode); + insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); + insn = gen_rtx_SET (target, insn); + emit_insn (insn); + return target; + } + } + + /* If we couldn't do it that way, try some other methods. But if we have + no instructions left, don't bother. Likewise, if this is SImode and + we can't make pseudos, we can't do anything since the expand_binop + and expand_unop calls will widen and try to make pseudos. */ + + if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) + return 0; + + /* Next, see if we can load a related constant and then shift and possibly + negate it to get the constant we want. Try this once each increasing + numbers of insns. */ + + for (i = 1; i < n; i++) + { + /* First, see if minus some low bits, we've an easy load of + high bits. */ + + new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; + if (new_const != 0) + { + temp = sw_64_emit_set_const (subtarget, mode, c - new_const, i, + no_output); + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, add_optab, temp, GEN_INT (new_const), + target, 0, OPTAB_WIDEN); + } + } + + /* Next try complementing. */ + temp = sw_64_emit_set_const (subtarget, mode, ~c, i, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_unop (mode, one_cmpl_optab, temp, target, 0); + } + + /* Next try to form a constant and do a left shift. We can do this + if some low-order bits are zero; the exact_log2 call below tells + us that information. The bits we are shifting out could be any + value, but here we'll just try the 0- and sign-extended forms of + the constant. To try to increase the chance of having the same + constant in more than one insn, start at the highest number of + bits to shift, but try all possibilities in case a ZAPNOT will + be useful. */ + + bits = exact_log2 (c & -c); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c >> bits; + temp + = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp && c < 0) + { + new_const = (unsigned HOST_WIDE_INT) c >> bits; + temp = sw_64_emit_set_const (subtarget, mode, new_const, i, + no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), + target, 0, OPTAB_WIDEN); + } + } + + /* Now try high-order zero bits. Here we try the shifted-in bits as + all zero and all ones. Be careful to avoid shifting outside the + mode and to avoid shifting outside the host wide int size. */ + + bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) + - floor_log2 (c) - 1); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c << bits; + temp + = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp) + { + new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1); + temp = sw_64_emit_set_const (subtarget, mode, new_const, i, + no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), + target, 1, OPTAB_WIDEN); + } + } + + /* Now try high-order 1 bits. We get that with a sign-extension. + But one bit isn't enough here. Be careful to avoid shifting outside + the mode and to avoid shifting outside the host wide int size. */ + + bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) + - floor_log2 (~c) - 2); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c << bits; + temp + = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp) + { + new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1); + temp = sw_64_emit_set_const (subtarget, mode, new_const, i, + no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), + target, 0, OPTAB_WIDEN); + } + } + } + + /* Finally, see if can load a value into the target that is the same as the + constant except that all bytes that are 0 are changed to be 0xff. If we + can, then we can do a ZAPNOT to obtain the desired constant. */ + + new_const = c; + for (i = 0; i < 64; i += 8) + if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) + new_const |= (HOST_WIDE_INT) 0xff << i; + + /* We are only called for SImode and DImode. If this is SImode, ensure that + we are sign extended to a full word. */ + + if (mode == SImode) + new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; + + if (new_const != c) + { + temp + = sw_64_emit_set_const (subtarget, mode, new_const, n - 1, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, and_optab, temp, GEN_INT (c | ~new_const), + target, 0, OPTAB_WIDEN); + } + } + + return 0; +} + +/* Try to output insns to set TARGET equal to the constant C if it can be + done in less than N insns. Do all computations in MODE. Returns the place + where the output has been placed if it can be done and the insns have been + emitted. If it would take more than N insns, zero is returned and no + insns and emitted. */ + +static rtx +sw_64_emit_set_const (rtx target, machine_mode mode, HOST_WIDE_INT c, int n, + bool no_output) +{ + machine_mode orig_mode = mode; + rtx orig_target = target; + rtx result = 0; + int i; + + /* If we can't make any pseudos, TARGET is an SImode hard register, we + can't load this constant in one insn, do this in DImode. */ + if (!can_create_pseudo_p () && mode == SImode && REG_P (target) + && REGNO (target) < FIRST_PSEUDO_REGISTER) + { + result = sw_64_emit_set_const_1 (target, mode, c, 1, no_output); + if (result) + return result; + + target = no_output ? NULL : gen_lowpart (DImode, target); + mode = DImode; + } + else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) + { + target = no_output ? NULL : gen_lowpart (DImode, target); + mode = DImode; + } + + /* Try 1 insn, then 2, then up to N. */ + for (i = 1; i <= n; i++) + { + result = sw_64_emit_set_const_1 (target, mode, c, i, no_output); + if (result) + { + rtx_insn *insn; + rtx set; + + if (no_output) + return result; + + insn = get_last_insn (); + set = single_set (insn); + if (!CONSTANT_P (SET_SRC (set))) + set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); + break; + } + } + + /* Allow for the case where we changed the mode of TARGET. */ + if (result) + { + if (result == target) + result = orig_target; + else if (mode != orig_mode) + result = gen_lowpart (orig_mode, result); + } + + return result; +} + +/* Having failed to find a 3 insn sequence in sw_64_emit_set_const, + fall back to a straight forward decomposition. We do this to avoid + exponential run times encountered when looking for longer sequences + with sw_64_emit_set_const. */ + +static rtx +sw_64_emit_set_long_const (rtx target, HOST_WIDE_INT c1) +{ + HOST_WIDE_INT d1, d2, d3, d4; + + /* Decompose the entire word. */ + + d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d1; + d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + c1 = (c1 - d2) >> 32; + d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d3; + d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c1 == d4); + + /* Construct the high word. */ + if (d4) + { + emit_move_insn (target, GEN_INT (d4)); + if (d3) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); + } + else + emit_move_insn (target, GEN_INT (d3)); + + /* Shift it into place. */ + emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); + + /* Add in the low bits. */ + if (d2) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); + if (d1) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); + + return target; +} + +/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */ + +static HOST_WIDE_INT +sw_64_extract_integer (rtx x) +{ + if (GET_CODE (x) == CONST_VECTOR) + x = simplify_subreg (DImode, x, GET_MODE (x), 0); + + gcc_assert (CONST_INT_P (x)); + + return INTVAL (x); +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which + we are willing to load the value into a register via a move pattern. + Normally this is all symbolic constants, integral constants that + take three or fewer instructions, and floating-point zero. */ + +bool +sw_64_legitimate_constant_p (machine_mode mode, rtx x) +{ + HOST_WIDE_INT i0; + + switch (GET_CODE (x)) + { + case LABEL_REF: + case HIGH: + return true; + + case CONST: + if (GET_CODE (XEXP (x, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + x = XEXP (XEXP (x, 0), 0); + else + return true; + + if (GET_CODE (x) != SYMBOL_REF) + return true; + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS symbols are never valid. */ + return SYMBOL_REF_TLS_MODEL (x) == 0; + + case CONST_WIDE_INT: + if (TARGET_BUILD_CONSTANTS) + return true; + if (x == CONST0_RTX (mode)) + return true; + mode = DImode; + gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2); + i0 = CONST_WIDE_INT_ELT (x, 1); + if (sw_64_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL) + return false; + i0 = CONST_WIDE_INT_ELT (x, 0); + goto do_integer; + + case CONST_DOUBLE: + if (x == CONST0_RTX (mode)) + return true; + return false; + + case CONST_VECTOR: + if (x == CONST0_RTX (mode)) + return true; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return false; + if (GET_MODE_SIZE (mode) != 8) + return false; + /* FALLTHRU */ + + case CONST_INT: + if (TARGET_BUILD_CONSTANTS) + return true; + i0 = sw_64_extract_integer (x); + do_integer: + return sw_64_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL; + + default: + return false; + } +} + +/* Operand 1 is known to be a constant, and should require more than one + instruction to load. Emit that multi-part load. */ + +bool +sw_64_split_const_mov (machine_mode mode, rtx *operands) +{ + HOST_WIDE_INT i0; + rtx temp = NULL_RTX; + + i0 = sw_64_extract_integer (operands[1]); + + temp = sw_64_emit_set_const (operands[0], mode, i0, 3, false); + + if (!temp && TARGET_BUILD_CONSTANTS) + temp = sw_64_emit_set_long_const (operands[0], i0); + + if (temp) + { + if (!rtx_equal_p (operands[0], temp)) + emit_move_insn (operands[0], temp); + return true; + } + + return false; +} + +/* Expand a move instruction; return true if all work is done. + We don't handle non-bwx subword loads here. */ + +bool +sw_64_expand_mov (machine_mode mode, rtx *operands) +{ + rtx tmp; + + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0]) && !reg_or_0_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + + /* Allow legitimize_address to perform some simplifications. */ + if (mode == Pmode && symbolic_operand (operands[1], mode)) + { + tmp = sw_64_legitimize_address_1 (operands[1], operands[0], mode); + if (tmp) + { + if (tmp == operands[0]) + return true; + operands[1] = tmp; + return false; + } + } + + /* Early out for non-constants and valid constants. */ + if (!CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) + return false; + + /* Split large integers. */ + if (CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_VECTOR) + { + if (sw_64_split_const_mov (mode, operands)) + return true; + } + + /* Otherwise we've nothing left but to drop the thing to memory. */ + tmp = force_const_mem (mode, operands[1]); + + if (tmp == NULL_RTX) + return false; + + if (reload_in_progress) + { + emit_move_insn (operands[0], XEXP (tmp, 0)); + operands[1] = replace_equiv_address (tmp, operands[0]); + } + else + operands[1] = validize_mem (tmp); + return false; +} + +/* Expand a non-bwx QImode or HImode move instruction; + return true if all work is done. */ + +bool +sw_64_expand_mov_nobwx (machine_mode mode, rtx *operands) +{ + rtx seq; + + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + + /* Handle four memory cases, unaligned and aligned for either the input + or the output. The only case where we can be called during reload is + for aligned loads; all other cases require temporaries. */ + + if (any_memory_operand (operands[1], mode)) + { + if (aligned_memory_operand (operands[1], mode)) + { + if (reload_in_progress) + { + seq = gen_reload_in_aligned (mode, operands[0], operands[1]); + emit_insn (seq); + } + else + { + rtx aligned_mem, bitnum; + rtx scratch = gen_reg_rtx (SImode); + rtx subtarget; + bool copyout; + + get_aligned_mem (operands[1], &aligned_mem, &bitnum); + + subtarget = operands[0]; + if (REG_P (subtarget)) + subtarget = gen_lowpart (DImode, subtarget), copyout = false; + else + subtarget = gen_reg_rtx (DImode), copyout = true; + + if (mode == QImode) + seq = gen_aligned_loadqi (subtarget, aligned_mem, bitnum, + scratch); + else + seq = gen_aligned_loadhi (subtarget, aligned_mem, bitnum, + scratch); + emit_insn (seq); + + if (copyout) + emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); + } + } + else + { + /* Don't pass these as parameters since that makes the generated + code depend on parameter evaluation order which will cause + bootstrap failures. */ + + rtx temp1, temp2, subtarget, ua; + bool copyout; + + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + + subtarget = operands[0]; + if (REG_P (subtarget)) + subtarget = gen_lowpart (DImode, subtarget), copyout = false; + else + subtarget = gen_reg_rtx (DImode), copyout = true; + + ua = get_unaligned_address (operands[1]); + if (mode == QImode) + seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); + else + seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); + + sw_64_set_memflags (seq, operands[1]); + emit_insn (seq); + + if (copyout) + emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); + } + return true; + } + + if (any_memory_operand (operands[0], mode)) + { + if (aligned_memory_operand (operands[0], mode)) + { + rtx aligned_mem, bitnum; + rtx temp1 = gen_reg_rtx (SImode); + rtx temp2 = gen_reg_rtx (SImode); + + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + + emit_insn ( + gen_aligned_store (aligned_mem, operands[1], bitnum, temp1, temp2)); + } + else + { + rtx temp1 = gen_reg_rtx (DImode); + rtx temp2 = gen_reg_rtx (DImode); + rtx temp3 = gen_reg_rtx (DImode); + rtx ua = get_unaligned_address (operands[0]); + + seq + = gen_unaligned_store (mode, ua, operands[1], temp1, temp2, temp3); + + sw_64_set_memflags (seq, operands[0]); + emit_insn (seq); + } + return true; + } + + return false; +} + +/* Implement the movmisalign patterns. One of the operands is a memory + that is not naturally aligned. Emit instructions to load it. */ + +void +sw_64_expand_movmisalign (machine_mode mode, rtx *operands) +{ + /* Honor misaligned loads, for those we promised to do so. */ + if (MEM_P (operands[1])) + { + rtx tmp; + + if (register_operand (operands[0], mode)) + tmp = operands[0]; + else + tmp = gen_reg_rtx (mode); + + sw_64_expand_unaligned_load (tmp, operands[1], 8, 0, 0); + if (tmp != operands[0]) + emit_move_insn (operands[0], tmp); + } + else if (MEM_P (operands[0])) + { + if (!reg_or_0_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + sw_64_expand_unaligned_store (operands[0], operands[1], 8, 0); + } + else + gcc_unreachable (); +} + +/* Generate an unsigned DImode to FP conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. + + For SFmode, this is the only construction I've found that can pass + gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode + intermediates will work, because you'll get intermediate rounding + that ruins the end result. Some of this could be fixed by turning + on round-to-positive-infinity, but that requires diddling the fpsr, + which kills performance. I tried turning this around and converting + to a negative number, so that I could turn on /m, but either I did + it wrong or there's something else cause I wound up with the exact + same single-bit error. There is a branch-less form of this same code: + + srl $16,1,$1 + and $16,1,$2 + cmplt $16,0,$3 + or $1,$2,$2 + selge $16,$16,$2 + ifmovd $3,$f10 + ifmovd $2,$f11 + fcvtlf $f11,$f11 + fadds $f11,$f11,$f0 + fseleq $f10,$f11,$f0 + + I'm not using it because it's the same number of instructions as + this branch-full form, and it has more serialized long latency + instructions on the critical path. + + For DFmode, we can avoid rounding errors by breaking up the word + into two pieces, converting them separately, and adding them back: + + LC0: .long 0,0x5f800000 + + ifmovd $16,$f11 + ldi $2,LC0 + cmplt $16,0,$1 + fcpyse $f11,$f31,$f10 + fcpyse $f31,$f11,$f11 + s4addw $1,$2,$1 + lds $f12,0($1) + fcvtls $f10,$f10 + fcvtls $f11,$f11 + faddd $f12,$f10,$f0 + faddd $f0,$f11,$f0 + + This doesn't seem to be a clear-cut win over the optabs form. + It probably all depends on the distribution of numbers being + converted -- in the optabs form, all but high-bit-set has a + much lower minimum execution time. */ + +void +sw_64_emit_floatuns (rtx operands[2]) +{ + rtx neglab, donelab, i0, i1, f0, in, out; + machine_mode mode; + + out = operands[0]; + in = force_reg (DImode, operands[1]); + mode = GET_MODE (out); + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + i0 = gen_reg_rtx (DImode); + i1 = gen_reg_rtx (DImode); + f0 = gen_reg_rtx (mode); + + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); + + emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in))); + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); + emit_insn (gen_anddi3 (i1, in, const1_rtx)); + emit_insn (gen_iordi3 (i0, i0, i1)); + emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0))); + emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); + + emit_label (donelab); +} + +/* Generate the comparison for a conditional branch. */ + +void +sw_64_emit_conditional_branch (rtx operands[], machine_mode cmp_mode) +{ + enum rtx_code cmp_code, branch_code; + machine_mode branch_mode = VOIDmode; + enum rtx_code code = GET_CODE (operands[0]); + rtx op0 = operands[1], op1 = operands[2]; + rtx tem; + + if (cmp_mode == TFmode) + { + op0 = sw_64_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + /* The general case: fold the comparison code to the types of compares + that we have, choosing the branch as necessary. */ + switch (code) + { + case EQ: + case LE: + case LT: + case LEU: + case LTU: + case UNORDERED: + /* We have these compares. */ + cmp_code = code, branch_code = NE; + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + cmp_code = reverse_condition (code), branch_code = EQ; + break; + + case GE: + case GT: + case GEU: + case GTU: + /* For FP, we swap them, for INT, we reverse them. */ + if (cmp_mode == DFmode || (cmp_mode == SFmode && flag_sw_sf_cmpsel)) + { + cmp_code = swap_condition (code); + branch_code = NE; + std::swap (op0, op1); + } + else + { + cmp_code = reverse_condition (code); + branch_code = EQ; + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DFmode) + { + if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) + { + /* When we are not as concerned about non-finite values, and we + are comparing against zero, we can branch directly. */ + if (op1 == CONST0_RTX (DFmode)) + cmp_code = UNKNOWN, branch_code = code; + else if (op0 == CONST0_RTX (DFmode)) + { + /* Undo the swap we probably did just above. */ + std::swap (op0, op1); + branch_code = swap_condition (cmp_code); + cmp_code = UNKNOWN; + } + } + else + { + /* ??? We mark the branch mode to be CCmode to prevent the + compare and branch from being combined, since the compare + insn follows IEEE rules that the branch does not. */ + branch_mode = CCmode; + } + } + else if (cmp_mode == SFmode && flag_sw_sf_cmpsel) + { + if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) + { + /* When we are not as concerned about non-finite values, and we + are comparing against zero, we can branch directly. */ + if (op1 == CONST0_RTX (SFmode)) + cmp_code = UNKNOWN, branch_code = code; + else if (op0 == CONST0_RTX (SFmode)) + { + /* Undo the swap we probably did just above. */ + std::swap (op0, op1); + branch_code = swap_condition (cmp_code); + cmp_code = UNKNOWN; + } + } + else + { + /* ??? We mark the branch mode to be CCmode to prevent the + compare and branch from being combined, since the compare + insn follows IEEE rules that the branch does not. */ + branch_mode = CCmode; + } + } + else + { + /* The following optimizations are only for signed compares. */ + if (code != LEU && code != LTU && code != GEU && code != GTU) + { + /* Whee. Compare and branch against 0 directly. */ + if (op1 == const0_rtx) + cmp_code = UNKNOWN, branch_code = code; + + /* If the constants doesn't fit into an immediate, but can + be generated by ldi/ldih, we adjust the argument and + compare against zero, so we can use beq/bne directly. */ + /* ??? Don't do this when comparing against symbols, otherwise + we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will + be declared false out of hand (at least for non-weak). */ + else if (CONST_INT_P (op1) && (code == EQ || code == NE) + && !(symbolic_operand (op0, VOIDmode) + || (REG_P (op0) && REG_POINTER (op0)))) + { + rtx n_op1 = GEN_INT (-INTVAL (op1)); + + if (!satisfies_constraint_I (op1) + && (satisfies_constraint_K (n_op1) + || satisfies_constraint_L (n_op1))) + cmp_code = PLUS, branch_code = code, op1 = n_op1; + } + } + + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* Emit an initial compare instruction, if necessary. */ + tem = op0; + if (cmp_code != UNKNOWN) + { + tem = gen_reg_rtx (cmp_mode); + emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); + } + + /* Emit the branch instruction. */ + tem = gen_rtx_SET ( + pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (branch_code, branch_mode, tem, + CONST0_RTX (cmp_mode)), + gen_rtx_LABEL_REF (VOIDmode, operands[3]), pc_rtx)); + emit_jump_insn (tem); +} + +/* Certain simplifications can be done to make invalid setcc operations + valid. Return the final comparison, or NULL if we can't work. */ + +bool +sw_64_emit_setcc (rtx operands[], machine_mode cmp_mode) +{ + enum rtx_code cmp_code; + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = operands[2], op1 = operands[3]; + rtx tmp; + + if (cmp_mode == TFmode) + { + op0 = sw_64_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + if (cmp_mode == DFmode && !TARGET_FIX) + return 0; + + /* The general case: fold the comparison code to the types of compares + that we have, choosing the branch as necessary. */ + + cmp_code = UNKNOWN; + switch (code) + { + case EQ: + case LE: + case LT: + case LEU: + case LTU: + case UNORDERED: + /* We have these compares. */ + if (cmp_mode == DFmode) + cmp_code = code, code = NE; + break; + + case NE: + if (cmp_mode == DImode && op1 == const0_rtx) + break; + /* FALLTHRU */ + + case ORDERED: + cmp_code = reverse_condition (code); + code = EQ; + break; + + case GE: + case GT: + case GEU: + case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + if (cmp_mode == DFmode) + cmp_code = code, code = NE; + std::swap (op0, op1); + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!register_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* Emit an initial compare instruction, if necessary. */ + if (cmp_code != UNKNOWN) + { + tmp = gen_reg_rtx (cmp_mode); + emit_insn ( + gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1))); + + op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; + op1 = const0_rtx; + } + + /* Emit the setcc instruction. */ + emit_insn ( + gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode, op0, op1))); + return true; +} + +/* Rewrite a comparison against zero CMP of the form + (CODE (cc0) (const_int 0)) so it can be written validly in + a conditional move (if_then_else CMP ...). + If both of the operands that set cc0 are nonzero we must emit + an insn to perform the compare (it can't be done within + the conditional move). */ + +rtx +sw_64_emit_conditional_move (rtx cmp, machine_mode mode) +{ + enum rtx_code code = GET_CODE (cmp); + enum rtx_code cmov_code = NE; + rtx op0 = XEXP (cmp, 0); + rtx op1 = XEXP (cmp, 1); + machine_mode cmp_mode + = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); + machine_mode cmov_mode = VOIDmode; + int local_fast_math = flag_unsafe_math_optimizations; + rtx tem; + + if (cmp_mode == TFmode) + { + op0 = sw_64_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + gcc_assert (cmp_mode == DFmode || cmp_mode == DImode || cmp_mode == SFmode); + + if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) + { + enum rtx_code cmp_code; + + if (!TARGET_FIX) + return 0; + + /* If we have fp<->int register move instructions, do a cmov by + performing the comparison in fp registers, and move the + zero/nonzero value to integer registers, where we can then + use a normal cmov, or vice-versa. */ + + switch (code) + { + case EQ: + case LE: + case LT: + case LEU: + case LTU: + case UNORDERED: + /* We have these compares. */ + cmp_code = code, code = NE; + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + cmp_code = reverse_condition (code), code = EQ; + break; + + case GE: + case GT: + case GEU: + case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + cmp_code = code, code = NE; + else + { + cmp_code = swap_condition (code); + code = NE; + std::swap (op0, op1); + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + tem = gen_reg_rtx (cmp_mode); + emit_insn ( + gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1))); + + cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode; + op0 = gen_lowpart (cmp_mode, tem); + op1 = CONST0_RTX (cmp_mode); + cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + local_fast_math = 1; + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* We may be able to use a conditional move directly. + This avoids emitting spurious compares. */ + if (signed_comparison_operator (cmp, VOIDmode) + && (cmp_mode == DImode || local_fast_math) + && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) + return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + + /* We can't put the comparison inside the conditional move; + emit a compare instruction and put that inside the + conditional move. Make sure we emit only comparisons we have; + swap or reverse as necessary. */ + + if (!can_create_pseudo_p ()) + return NULL_RTX; + + switch (code) + { + case EQ: + case LE: + case LT: + case LEU: + case LTU: + case UNORDERED: + /* We have these compares: */ + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + code = reverse_condition (code); + cmov_code = EQ; + break; + + case GE: + case GT: + case GEU: + case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + std::swap (op0, op1); + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* ??? We mark the branch mode to be CCmode to prevent the compare + and cmov from being combined, since the compare insn follows IEEE + rules that the cmov does not. */ + if (cmp_mode == DFmode && !local_fast_math) + cmov_mode = CCmode; + + tem = gen_reg_rtx (cmp_mode); + emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); + return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); +} + +/* Simplify a conditional move of two constants into a setcc with + arithmetic. This is done with a splitter since combine would + just undo the work if done during code generation. It also catches + cases we wouldn't have before cse. */ + +int +sw_64_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, rtx t_rtx, + rtx f_rtx) +{ + HOST_WIDE_INT t, f, diff; + machine_mode mode; + rtx target, subtarget, tmp; + + mode = GET_MODE (dest); + t = INTVAL (t_rtx); + f = INTVAL (f_rtx); + diff = t - f; + + if (((code == NE || code == EQ) && diff < 0) || (code == GE || code == GT)) + { + code = reverse_condition (code); + std::swap (t, f); + diff = -diff; + } + + subtarget = target = dest; + if (mode != DImode) + { + target = gen_lowpart (DImode, dest); + if (can_create_pseudo_p ()) + subtarget = gen_reg_rtx (DImode); + else + subtarget = target; + } + /* Below, we must be careful to use copy_rtx on target and subtarget + in intermediate insns, as they may be a subreg rtx, which may not + be shared. */ + + if (f == 0 + && exact_log2 (diff) > 0 + /* On SW6, we've got enough shifters to make non-arithmetic shifts + viable over a longer latency cmove. */ + && (diff <= 8 || sw_64_tune == PROCESSOR_SW6 + || sw_64_tune == PROCESSOR_SW8)) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); + + tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), + GEN_INT (exact_log2 (t))); + emit_insn (gen_rtx_SET (target, tmp)); + } + else if (f == 0 && t == -1) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); + + emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); + } + else if (diff == 1 || diff == 4 || diff == 8) + { + rtx add_op; + + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); + + if (diff == 1) + emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); + else + { + add_op = GEN_INT (f); + if (sext_add_operand (add_op, mode)) + { + // in sw_64 sxsubw is ra*x + rb; + tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), GEN_INT (diff)); + tmp = gen_rtx_PLUS (DImode, tmp, add_op); + emit_insn (gen_rtx_SET (target, tmp)); + } + else + return 0; + } + } + else + return 0; + + return 1; +} + +/* Look up the function X_floating library function name for the + given operation. */ + +struct GTY (()) xfloating_op +{ + const enum rtx_code code; + const char *const GTY ((skip)) osf_func; + const char *const GTY ((skip)) vms_func; + rtx libcall; +}; + +static GTY (()) struct xfloating_op xfloating_ops[] + = {{PLUS, "_OtsAddX", "OTS$ADD_X", 0}, + {MINUS, "_OtsSubX", "OTS$SUB_X", 0}, + {MULT, "_OtsMulX", "OTS$MUL_X", 0}, + {DIV, "_OtsDivX", "OTS$DIV_X", 0}, + {EQ, "_OtsEqlX", "OTS$EQL_X", 0}, + {NE, "_OtsNeqX", "OTS$NEQ_X", 0}, + {LT, "_OtsLssX", "OTS$LSS_X", 0}, + {LE, "_OtsLeqX", "OTS$LEQ_X", 0}, + {GT, "_OtsGtrX", "OTS$GTR_X", 0}, + {GE, "_OtsGeqX", "OTS$GEQ_X", 0}, + {FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0}, + {FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0}, + {UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0}, + {FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0}, + {FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0}}; + +static GTY (()) struct xfloating_op vax_cvt_ops[] + = {{FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0}, + {FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0}}; + +static rtx +sw_64_lookup_xfloating_lib_func (enum rtx_code code) +{ + struct xfloating_op *ops = xfloating_ops; + long n = ARRAY_SIZE (xfloating_ops); + long i; + + gcc_assert (TARGET_HAS_XFLOATING_LIBS); + + /* How irritating. Nothing to key off for the main table. */ + if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) + { + ops = vax_cvt_ops; + n = ARRAY_SIZE (vax_cvt_ops); + } + + for (i = 0; i < n; ++i, ++ops) + if (ops->code == code) + { + rtx func = ops->libcall; + if (!func) + { + func = init_one_libfunc (ops->osf_func); + ops->libcall = func; + } + return func; + } + + gcc_unreachable (); +} + +/* Most X_floating operations take the rounding mode as an argument. + Compute that here. */ + +static int +sw_64_compute_xfloating_mode_arg (enum rtx_code code, + enum sw_64_fp_rounding_mode round) +{ + int mode; + + switch (round) + { + case SW_64_FPRM_NORM: + mode = 2; + break; + case SW_64_FPRM_MINF: + mode = 1; + break; + case SW_64_FPRM_CHOP: + mode = 0; + break; + case SW_64_FPRM_DYN: + mode = 4; + break; + default: + gcc_unreachable (); + + /* XXX For reference, round to +inf is mode = 3. */ + } + + if (code == FLOAT_TRUNCATE && sw_64_fptm == SW_64_FPTM_N) + mode |= 0x10000; + + return mode; +} + +/* Emit an X_floating library function call. + + Note that these functions do not follow normal calling conventions: + TFmode arguments are passed in two integer registers (as opposed to + indirect); TFmode return values appear in R16+R17. + + FUNC is the function to call. + TARGET is where the output belongs. + OPERANDS are the inputs. + NOPERANDS is the count of inputs. + EQUIV is the expression equivalent for the function. +*/ + +static void +sw_64_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], + int noperands, rtx equiv) +{ + rtx usage = NULL_RTX, reg; + int regno = 16, i; + + start_sequence (); + + for (i = 0; i < noperands; ++i) + { + switch (GET_MODE (operands[i])) + { + case E_TFmode: + reg = gen_rtx_REG (TFmode, regno); + regno += 2; + break; + + case E_DFmode: + reg = gen_rtx_REG (DFmode, regno + 32); + regno += 1; + break; + + case E_VOIDmode: + gcc_assert (CONST_INT_P (operands[i])); + /* FALLTHRU */ + case E_DImode: + reg = gen_rtx_REG (DImode, regno); + regno += 1; + break; + + default: + gcc_unreachable (); + } + + emit_move_insn (reg, operands[i]); + use_reg (&usage, reg); + } + + switch (GET_MODE (target)) + { + case E_TFmode: + reg = gen_rtx_REG (TFmode, 16); + break; + case E_DFmode: + reg = gen_rtx_REG (DFmode, 32); + break; + case E_DImode: + reg = gen_rtx_REG (DImode, 0); + break; + default: + gcc_unreachable (); + } + + rtx mem = gen_rtx_MEM (QImode, func); + rtx_insn *tmp = emit_call_insn ( + gen_call_value (reg, mem, const0_rtx, const0_rtx, const0_rtx)); + CALL_INSN_FUNCTION_USAGE (tmp) = usage; + RTL_CONST_CALL_P (tmp) = 1; + + tmp = get_insns (); + end_sequence (); + + emit_libcall_block (tmp, target, reg, equiv); +} + +/* Emit an X_floating library function call for arithmetic (+,-,*,/). */ + +void +sw_64_emit_xfloating_arith (enum rtx_code code, rtx operands[]) +{ + rtx func; + int mode; + rtx out_operands[3]; + + func = sw_64_lookup_xfloating_lib_func (code); + mode = sw_64_compute_xfloating_mode_arg (code, sw_64_fprm); + + out_operands[0] = operands[1]; + out_operands[1] = operands[2]; + out_operands[2] = GEN_INT (mode); + sw_64_emit_xfloating_libcall (func, operands[0], out_operands, 3, + gen_rtx_fmt_ee (code, TFmode, operands[1], + operands[2])); +} + +/* Emit an X_floating library function call for a comparison. */ + +static rtx +sw_64_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) +{ + enum rtx_code cmp_code, res_code; + rtx func, out, operands[2], note; + + /* X_floating library comparison functions return + -1 unordered + 0 false + 1 true + Convert the compare against the raw return value. */ + + cmp_code = *pcode; + switch (cmp_code) + { + case UNORDERED: + cmp_code = EQ; + res_code = LT; + break; + case ORDERED: + cmp_code = EQ; + res_code = GE; + break; + case NE: + res_code = NE; + break; + case EQ: + case LT: + case GT: + case LE: + case GE: + res_code = GT; + break; + default: + gcc_unreachable (); + } + *pcode = res_code; + + func = sw_64_lookup_xfloating_lib_func (cmp_code); + + operands[0] = op0; + operands[1] = op1; + out = gen_reg_rtx (DImode); + + /* What's actually returned is -1,0,1, not a proper boolean value. */ + note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); + note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); + sw_64_emit_xfloating_libcall (func, out, operands, 2, note); + + return out; +} + +/* Emit an X_floating library function call for a conversion. */ + +void +sw_64_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[]) +{ + int noperands = 1, mode; + rtx out_operands[2]; + rtx func; + enum rtx_code code = orig_code; + + if (code == UNSIGNED_FIX) + code = FIX; + + func = sw_64_lookup_xfloating_lib_func (code); + + out_operands[0] = operands[1]; + + switch (code) + { + case FIX: + mode = sw_64_compute_xfloating_mode_arg (code, SW_64_FPRM_CHOP); + out_operands[1] = GEN_INT (mode); + noperands = 2; + break; + case FLOAT_TRUNCATE: + mode = sw_64_compute_xfloating_mode_arg (code, sw_64_fprm); + out_operands[1] = GEN_INT (mode); + noperands = 2; + break; + default: + break; + } + + sw_64_emit_xfloating_libcall (func, operands[0], out_operands, noperands, + gen_rtx_fmt_e (orig_code, + GET_MODE (operands[0]), + operands[1])); +} + +/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of + DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true, + guarantee that the sequence + set (OP[0] OP[2]) + set (OP[1] OP[3]) + is valid. Naturally, output operand ordering is little-endian. + This is used by *movtf_internal and *movti_internal. */ + +void +sw_64_split_tmode_pair (rtx operands[4], machine_mode mode, bool fixup_overlap) +{ + switch (GET_CODE (operands[1])) + { + case REG: + operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[1])); + break; + + case MEM: + operands[3] = adjust_address (operands[1], DImode, 8); + operands[2] = adjust_address (operands[1], DImode, 0); + break; + + CASE_CONST_SCALAR_INT: + case CONST_DOUBLE: + gcc_assert (operands[1] == CONST0_RTX (mode)); + operands[2] = operands[3] = const0_rtx; + break; + + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[0])) + { + case REG: + operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + break; + + case MEM: + operands[1] = adjust_address (operands[0], DImode, 8); + operands[0] = adjust_address (operands[0], DImode, 0); + break; + + default: + gcc_unreachable (); + } + + if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3])) + { + std::swap (operands[0], operands[1]); + std::swap (operands[2], operands[3]); + } +} + +/* Implement negtf2 or abstf2. Op0 is destination, op1 is source, + op2 is a register containing the sign bit, operation is the + logical operation to be performed. */ + +void +sw_64_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx)) +{ + rtx high_bit = operands[2]; + rtx scratch; + int move; + + sw_64_split_tmode_pair (operands, TFmode, false); + + /* Detect three flavors of operand overlap. */ + move = 1; + if (rtx_equal_p (operands[0], operands[2])) + move = 0; + else if (rtx_equal_p (operands[1], operands[2])) + { + if (rtx_equal_p (operands[0], high_bit)) + move = 2; + else + move = -1; + } + + if (move < 0) + emit_move_insn (operands[0], operands[2]); + + /* ??? If the destination overlaps both source tf and high_bit, then + assume source tf is dead in its entirety and use the other half + for a scratch register. Otherwise "scratch" is just the proper + destination register. */ + scratch = operands[move < 2 ? 1 : 3]; + + emit_insn ((*operation) (scratch, high_bit, operands[3])); + + if (move > 0) + { + emit_move_insn (operands[0], operands[2]); + if (move > 1) + emit_move_insn (operands[1], scratch); + } +} + +/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting + unaligned data: + + unsigned: signed: + word: ldl_u r1,X(r11) ldl_u r1,X(r11) + ldl_u r2,X+1(r11) ldl_u r2,X+1(r11) + ldi r3,X(r11) ldi r3,X+2(r11) + exthl r1,r3,r1 extll r1,r3,r1 + exthh r2,r3,r2 extlh r2,r3,r2 + or r1.r2.r1 or r1,r2,r1 + sra r1,48,r1 + + long: ldl_u r1,X(r11) ldl_u r1,X(r11) + ldl_u r2,X+3(r11) ldl_u r2,X+3(r11) + ldi r3,X(r11) ldi r3,X(r11) + extll r1,r3,r1 extll r1,r3,r1 + extlh r2,r3,r2 extlh r2,r3,r2 + or r1.r2.r1 addl r1,r2,r1 + + quad: ldl_u r1,X(r11) + ldl_u r2,X+7(r11) + ldi r3,X(r11) + extll r1,r3,r1 + extlh r2,r3,r2 + or r1.r2.r1 +*/ + +void +sw_64_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size, + HOST_WIDE_INT ofs, int sign) +{ + rtx meml, memh, addr, extl, exth, tmp, mema; + machine_mode mode; + + if (TARGET_BWX && size == 2) + { + meml = adjust_address (mem, QImode, ofs); + memh = adjust_address (mem, QImode, ofs + 1); + extl = gen_reg_rtx (DImode); + exth = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendqidi2 (extl, meml)); + emit_insn (gen_zero_extendqidi2 (exth, memh)); + exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), NULL, 1, + OPTAB_LIB_WIDEN); + addr = expand_simple_binop (DImode, IOR, extl, exth, NULL, 1, + OPTAB_LIB_WIDEN); + + if (sign && GET_MODE (tgt) != HImode) + { + addr = gen_lowpart (HImode, addr); + emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0)); + } + else + { + if (GET_MODE (tgt) != DImode) + addr = gen_lowpart (GET_MODE (tgt), addr); + emit_move_insn (tgt, addr); + } + return; + } + + meml = gen_reg_rtx (Pmode); + memh = gen_reg_rtx (Pmode); + addr = gen_reg_rtx (Pmode); + extl = gen_reg_rtx (Pmode); + exth = gen_reg_rtx (Pmode); + + mema = XEXP (mem, 0); + rtx mema_const, mema_ptr; + if (GET_CODE (mema) == LO_SUM) + mema = force_reg (Pmode, mema); + + // TODO: split const ptr + if (GET_CODE (mema) == PLUS) + { + mema_ptr = XEXP (mema, 0); + mema_const = XEXP (mema, 1); + } + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + + tmp = change_address (mem, Pmode, + gen_rtx_AND (Pmode, plus_constant (Pmode, mema, ofs), + GEN_INT (-8))); + set_mem_alias_set (tmp, 0); + emit_move_insn (meml, tmp); + + tmp + = change_address (mem, Pmode, + gen_rtx_AND (Pmode, + plus_constant (Pmode, mema, ofs + size - 1), + GEN_INT (-8))); + set_mem_alias_set (tmp, 0); + emit_move_insn (memh, tmp); + + if (sign && size == 2) + { + emit_move_insn (addr, plus_constant (Pmode, mema, ofs + 2)); + + emit_insn (gen_extql (extl, meml, addr)); + emit_insn (gen_extqh (exth, memh, addr)); + + /* We must use tgt here for the target. Sw_64 port fails if we use + addr for the target, because addr is marked as a pointer and combine + knows that pointers are always sign-extended 32-bit values. */ + addr = expand_binop (Pmode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); + addr = expand_binop (Pmode, ashr_optab, addr, GEN_INT (48), addr, 1, + OPTAB_WIDEN); + } + else + { + if (GET_CODE (mema) == PLUS && CONST_INT_P (mema_const) && + // (INTVAL (mema_const) > 32767 || INTVAL (mema_const) < -32767)) + // { + (!add_operand (mema_const, VOIDmode))) + { + rtx tmpreg = gen_reg_rtx (DImode); + tmpreg = sw_64_emit_set_const ( + tmpreg, DImode, INTVAL (plus_constant (Pmode, mema_const, ofs)), 2, + false); + emit_insn (gen_adddi3 (addr, mema_ptr, tmpreg)); + } + else + { + emit_move_insn (addr, plus_constant (Pmode, mema, ofs)); + } + emit_insn (gen_extxl (extl, meml, GEN_INT (size * 8), addr)); + switch ((int) size) + { + case 2: + emit_insn (gen_extwh (exth, memh, addr)); + mode = HImode; + break; + case 4: + emit_insn (gen_extlh (exth, memh, addr)); + mode = SImode; + break; + case 8: + emit_insn (gen_extqh (exth, memh, addr)); + mode = DImode; + break; + default: + gcc_unreachable (); + } + + addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl), + gen_lowpart (mode, exth), gen_lowpart (mode, tgt), + sign, OPTAB_WIDEN); + } + + if (addr != tgt) + emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr)); +} + +/* Similarly, use ins and msk instructions to perform unaligned stores. */ + +void +sw_64_expand_unaligned_store (rtx dst, rtx src, HOST_WIDE_INT size, + HOST_WIDE_INT ofs) +{ + rtx dstl, dsth, addr, insl, insh, meml, memh, dsta; + + if (TARGET_BWX && size == 2) + { + if (src != const0_rtx) + { + dstl = gen_lowpart (QImode, src); + dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), NULL, + 1, OPTAB_LIB_WIDEN); + dsth = gen_lowpart (QImode, dsth); + } + else + dstl = dsth = const0_rtx; + + meml = adjust_address (dst, QImode, ofs); + memh = adjust_address (dst, QImode, ofs + 1); + + emit_move_insn (meml, dstl); + emit_move_insn (memh, dsth); + return; + } + + dstl = gen_reg_rtx (Pmode); + dsth = gen_reg_rtx (Pmode); + insl = gen_reg_rtx (Pmode); + insh = gen_reg_rtx (Pmode); + + dsta = XEXP (dst, 0); + if (GET_CODE (dsta) == LO_SUM) + dsta = force_reg (Pmode, dsta); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + + meml = change_address (dst, Pmode, + gen_rtx_AND (Pmode, plus_constant (Pmode, dsta, ofs), + GEN_INT (-8))); + set_mem_alias_set (meml, 0); + + memh + = change_address (dst, Pmode, + gen_rtx_AND (Pmode, + plus_constant (Pmode, dsta, ofs + size - 1), + GEN_INT (-8))); + set_mem_alias_set (memh, 0); + + emit_move_insn (dsth, memh); + emit_move_insn (dstl, meml); + + addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); + + if (src != CONST0_RTX (GET_MODE (src))) + { + emit_insn ( + gen_insxh (insh, gen_lowpart (DImode, src), GEN_INT (size * 8), addr)); + + switch ((int) size) + { + case 2: + emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); + break; + case 4: + emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); + break; + case 8: + emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); + break; + default: + gcc_unreachable (); + } + } + + emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr)); + + switch ((int) size) + { + case 2: + emit_insn (gen_mskwl (dstl, dstl, addr)); + break; + case 4: + emit_insn (gen_mskll (dstl, dstl, addr)); + break; + case 8: + emit_insn (gen_mskql (dstl, dstl, addr)); + break; + default: + gcc_unreachable (); + } + + if (src != CONST0_RTX (GET_MODE (src))) + { + dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN); + dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN); + } + + /* Must store high before low for degenerate case of aligned. */ + emit_move_insn (memh, dsth); + emit_move_insn (meml, dstl); +} + +/* The block move code tries to maximize speed by separating loads and + stores at the expense of register pressure: we load all of the data + before we store it back out. There are two secondary effects worth + mentioning, that this speeds copying to/from aligned and unaligned + buffers, and that it makes the code significantly easier to write. */ + +#define MAX_MOVE_WORDS 8 + +/* Load an integral number of consecutive unaligned quadwords. */ + +static void +sw_64_expand_unaligned_load_words (rtx *out_regs, rtx smem, HOST_WIDE_INT words, + HOST_WIDE_INT ofs) +{ + rtx const im8 = GEN_INT (-8); + rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS + 1]; + rtx sreg, areg, tmp, smema; + HOST_WIDE_INT i; + + smema = XEXP (smem, 0); + if (GET_CODE (smema) == LO_SUM) + smema = force_reg (Pmode, smema); + + /* Generate all the tmp registers we need. */ + for (i = 0; i < words; ++i) + { + data_regs[i] = out_regs[i]; + ext_tmps[i] = gen_reg_rtx (DImode); + } + data_regs[words] = gen_reg_rtx (DImode); + + if (ofs != 0) + smem = adjust_address (smem, GET_MODE (smem), ofs); + + /* Load up all of the source data. */ + for (i = 0; i < words; ++i) + { + tmp = change_address (smem, Pmode, + gen_rtx_AND (Pmode, + plus_constant (Pmode, smema, 8 * i), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (data_regs[i], tmp); + } + + tmp = change_address ( + smem, Pmode, + gen_rtx_AND (Pmode, plus_constant (Pmode, smema, 8 * words - 1), im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (data_regs[words], tmp); + + /* Extract the half-word fragments. Unfortunately decided to make + extxh with offset zero a noop instead of zeroing the register, so + we must take care of that edge condition ourselves with cmov. */ + + sreg = copy_addr_to_reg (smema); + areg + = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, 1, OPTAB_WIDEN); + for (i = 0; i < words; ++i) + { + emit_insn (gen_extql (data_regs[i], data_regs[i], sreg)); + emit_insn (gen_extqh (ext_tmps[i], data_regs[i + 1], sreg)); + emit_insn (gen_rtx_SET ( + ext_tmps[i], + gen_rtx_IF_THEN_ELSE (DImode, gen_rtx_EQ (DImode, areg, const0_rtx), + const0_rtx, ext_tmps[i]))); + } + + /* Merge the half-words into whole words. */ + for (i = 0; i < words; ++i) + { + out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], ext_tmps[i], + data_regs[i], 1, OPTAB_WIDEN); + } +} + +/* Store an integral number of consecutive unaligned quadwords. DATA_REGS + may be NULL to store zeros. */ + +static void +sw_64_expand_unaligned_store_words (rtx *data_regs, rtx dmem, + HOST_WIDE_INT words, HOST_WIDE_INT ofs) +{ + rtx const im8 = GEN_INT (-8); + rtx ins_tmps[MAX_MOVE_WORDS]; + rtx st_tmp_1, st_tmp_2, dreg; + rtx st_addr_1, st_addr_2, dmema; + HOST_WIDE_INT i; + + dmema = XEXP (dmem, 0); + if (GET_CODE (dmema) == LO_SUM) + dmema = force_reg (Pmode, dmema); + + /* Generate all the tmp registers we need. */ + if (data_regs != NULL) + for (i = 0; i < words; ++i) + ins_tmps[i] = gen_reg_rtx (DImode); + st_tmp_1 = gen_reg_rtx (DImode); + st_tmp_2 = gen_reg_rtx (DImode); + + if (ofs != 0) + dmem = adjust_address (dmem, GET_MODE (dmem), ofs); + + st_addr_2 = change_address ( + dmem, Pmode, + gen_rtx_AND (Pmode, plus_constant (Pmode, dmema, words * 8 - 1), im8)); + set_mem_alias_set (st_addr_2, 0); + + st_addr_1 = change_address (dmem, Pmode, gen_rtx_AND (Pmode, dmema, im8)); + set_mem_alias_set (st_addr_1, 0); + + /* Load up the destination end bits. */ + emit_move_insn (st_tmp_2, st_addr_2); + emit_move_insn (st_tmp_1, st_addr_1); + + /* Shift the input data into place. */ + dreg = copy_addr_to_reg (dmema); + if (data_regs != NULL) + { + for (i = words - 1; i >= 0; --i) + { + emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); + emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); + } + for (i = words - 1; i > 0; --i) + { + ins_tmps[i - 1] + = expand_binop (DImode, ior_optab, data_regs[i], ins_tmps[i - 1], + ins_tmps[i - 1], 1, OPTAB_WIDEN); + } + } + + /* Split and merge the ends with the destination data. */ + emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg)); + emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg)); + + if (data_regs != NULL) + { + st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words - 1], + st_tmp_2, 1, OPTAB_WIDEN); + st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0], + st_tmp_1, 1, OPTAB_WIDEN); + } + + /* Store it all. */ + emit_move_insn (st_addr_2, st_tmp_2); + for (i = words - 1; i > 0; --i) + { + rtx tmp = change_address ( + dmem, Pmode, + gen_rtx_AND (Pmode, plus_constant (Pmode, dmema, i * 8), im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (tmp, data_regs ? ins_tmps[i - 1] : const0_rtx); + } + emit_move_insn (st_addr_1, st_tmp_1); +} + +/* Expand string/block move operations. + + operands[0] is the pointer to the destination. + operands[1] is the pointer to the source. + operands[2] is the number of bytes to move. + operands[3] is the alignment. */ + +int +sw_64_expand_block_move (rtx operands[]) +{ + rtx bytes_rtx = operands[2]; + rtx align_rtx = operands[3]; + HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT dst_align = src_align; + rtx orig_src = operands[1]; + rtx orig_dst = operands[0]; + rtx data_regs[2 * MAX_MOVE_WORDS + 16]; + rtx tmp; + unsigned int i, words, ofs, nregs = 0; + + if (orig_bytes <= 0) + return 1; + else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) + return 0; + + /* Look for additional alignment information from recorded register info. */ + + tmp = XEXP (orig_src, 0); + if (REG_P (tmp)) + src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > src_align) + { + if (a >= 64 && c % 8 == 0) + src_align = 64; + else if (a >= 32 && c % 4 == 0) + src_align = 32; + else if (a >= 16 && c % 2 == 0) + src_align = 16; + } + } + + tmp = XEXP (orig_dst, 0); + if (REG_P (tmp)) + dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > dst_align) + { + if (a >= 64 && c % 8 == 0) + dst_align = 64; + else if (a >= 32 && c % 4 == 0) + dst_align = 32; + else if (a >= 16 && c % 2 == 0) + dst_align = 16; + } + } + + ofs = 0; + if (src_align >= 64 && bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words; ++i) + data_regs[nregs + i] = gen_reg_rtx (DImode); + + for (i = 0; i < words; ++i) + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, DImode, ofs + i * 8)); + + nregs += words; + bytes -= words * 8; + ofs += words * 8; + } + + if (src_align >= 32 && bytes >= 4) + { + words = bytes / 4; + + for (i = 0; i < words; ++i) + data_regs[nregs + i] = gen_reg_rtx (SImode); + + for (i = 0; i < words; ++i) + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + i * 4)); + + nregs += words; + bytes -= words * 4; + ofs += words * 4; + } + + if (bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words + 1; ++i) + data_regs[nregs + i] = gen_reg_rtx (DImode); + + sw_64_expand_unaligned_load_words (data_regs + nregs, orig_src, words, + ofs); + + nregs += words; + bytes -= words * 8; + ofs += words * 8; + } + + if (!TARGET_BWX && bytes >= 4) + { + data_regs[nregs++] = tmp = gen_reg_rtx (SImode); + sw_64_expand_unaligned_load (tmp, orig_src, 4, ofs, 0); + bytes -= 4; + ofs += 4; + } + + if (bytes >= 2) + { + if (src_align >= 16) + { + do + { + data_regs[nregs++] = tmp = gen_reg_rtx (HImode); + emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); + bytes -= 2; + ofs += 2; + } + while (bytes >= 2); + } + else if (!TARGET_BWX) + { + data_regs[nregs++] = tmp = gen_reg_rtx (HImode); + sw_64_expand_unaligned_load (tmp, orig_src, 2, ofs, 0); + bytes -= 2; + ofs += 2; + } + } + + while (bytes > 0) + { + data_regs[nregs++] = tmp = gen_reg_rtx (QImode); + emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs)); + bytes -= 1; + ofs += 1; + } + + gcc_assert (nregs <= ARRAY_SIZE (data_regs)); + + /* Now save it back out again. */ + + i = 0, ofs = 0; + + /* Write out the data in whatever chunks reading the source allowed. */ + if (dst_align >= 64) + { + while (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + emit_move_insn (adjust_address (orig_dst, DImode, ofs), data_regs[i]); + ofs += 8; + i++; + } + } + + if (dst_align >= 32) + { + /* If the source has remaining DImode regs, write them out in + two pieces. */ + while (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + gen_lowpart (SImode, data_regs[i])); + emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4), + gen_lowpart (SImode, tmp)); + ofs += 8; + i++; + } + + while (i < nregs && GET_MODE (data_regs[i]) == SImode) + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), data_regs[i]); + ofs += 4; + i++; + } + } + + if (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + /* Write out a remaining block of words using unaligned methods. */ + + for (words = 1; i + words < nregs; words++) + if (GET_MODE (data_regs[i + words]) != DImode) + break; + + if (words == 1) + sw_64_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + else + sw_64_expand_unaligned_store_words (data_regs + i, orig_dst, words, + ofs); + + i += words; + ofs += words * 8; + } + + /* Due to the above, this won't be aligned. */ + /* ??? If we have more than one of these, consider constructing full + words in registers and using sw_64_expand_unaligned_store_words. */ + while (i < nregs && GET_MODE (data_regs[i]) == SImode) + { + sw_64_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); + ofs += 4; + i++; + } + + if (dst_align >= 16) + while (i < nregs && GET_MODE (data_regs[i]) == HImode) + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); + i++; + ofs += 2; + } + else + while (i < nregs && GET_MODE (data_regs[i]) == HImode) + { + sw_64_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); + i++; + ofs += 2; + } + + /* The remainder must be byte copies. */ + while (i < nregs) + { + gcc_assert (GET_MODE (data_regs[i]) == QImode); + emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); + i++; + ofs += 1; + } + + return 1; +} + +int +sw_64_expand_block_clear (rtx operands[]) +{ + rtx bytes_rtx = operands[1]; + rtx align_rtx = operands[3]; + HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT alignofs = 0; + rtx orig_dst = operands[0]; + rtx tmp; + int i, words, ofs = 0; + + if (orig_bytes <= 0) + return 1; + if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) + return 0; + + /* Look for stricter alignment. */ + tmp = XEXP (orig_dst, 0); + if (REG_P (tmp)) + align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > align) + { + if (a >= 64) + align = a, alignofs = 8 - c % 8; + else if (a >= 32) + align = a, alignofs = 4 - c % 4; + else if (a >= 16) + align = a, alignofs = 2 - c % 2; + } + } + + /* Handle an unaligned prefix first. */ + + if (alignofs > 0) + { + /* Given that alignofs is bounded by align, the only time BWX could + generate three stores is for a 7 byte fill. Prefer two individual + stores over a load/mask/store sequence. */ + if ((!TARGET_BWX || alignofs == 7) && align >= 32 + && !(alignofs == 4 && bytes >= 4)) + { + machine_mode mode = (align >= 64 ? DImode : SImode); + int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, mode, ofs - inv_alignofs); + set_mem_alias_set (mem, 0); + + mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8)); + if (bytes < alignofs) + { + mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8); + ofs += bytes; + bytes = 0; + } + else + { + bytes -= alignofs; + ofs += alignofs; + } + alignofs = 0; + + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), NULL_RTX, 1, + OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + } + + if (TARGET_BWX && (alignofs & 1) && bytes >= 1) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + alignofs -= 1; + } + if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx); + bytes -= 2; + ofs += 2; + alignofs -= 2; + } + if (alignofs == 4 && bytes >= 4) + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); + bytes -= 4; + ofs += 4; + alignofs = 0; + } + + /* If we've not used the extra lead alignment information by now, + we won't be able to. Downgrade align to match what's left over. */ + if (alignofs > 0) + { + alignofs = alignofs & -alignofs; + align = MIN (align, alignofs * BITS_PER_UNIT); + } + } + + /* Handle a block of contiguous long-words. */ + + if (align >= 64 && bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words; ++i) + emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8), + const0_rtx); + + bytes -= words * 8; + ofs += words * 8; + } + + /* If the block is large and appropriately aligned, emit a single + store followed by a sequence of stl_u insns. */ + + if (align >= 32 && bytes > 16) + { + rtx orig_dsta; + + emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); + bytes -= 4; + ofs += 4; + + orig_dsta = XEXP (orig_dst, 0); + if (GET_CODE (orig_dsta) == LO_SUM) + orig_dsta = force_reg (Pmode, orig_dsta); + + words = bytes / 8; + for (i = 0; i < words; ++i) + { + rtx mem = change_address ( + orig_dst, Pmode, + gen_rtx_AND (Pmode, plus_constant (Pmode, orig_dsta, ofs + i * 8), + GEN_INT (-8))); + set_mem_alias_set (mem, 0); + emit_move_insn (mem, const0_rtx); + } + + /* Depending on the alignment, the first stl_u may have overlapped + with the initial stl, which means that the last stl_u didn't + write as much as it would appear. Leave those questionable bytes + unaccounted for. */ + bytes -= words * 8 - 4; + ofs += words * 8 - 4; + } + + /* Handle a smaller block of aligned words. */ + + if ((align >= 64 && bytes == 4) || (align == 32 && bytes >= 4)) + { + words = bytes / 4; + + for (i = 0; i < words; ++i) + emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4), + const0_rtx); + + bytes -= words * 4; + ofs += words * 4; + } + + /* An unaligned block uses stl_u stores for as many as possible. */ + + if (bytes >= 8) + { + words = bytes / 8; + + sw_64_expand_unaligned_store_words (NULL, orig_dst, words, ofs); + + bytes -= words * 8; + ofs += words * 8; + } + + /* Next clean up any trailing pieces. */ + + /* Count the number of bits in BYTES for which aligned stores could + be emitted. */ + words = 0; + for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align; i <<= 1) + if (bytes & i) + words += 1; + + /* If we have appropriate alignment (and it wouldn't take too many + instructions otherwise), mask out the bytes we need. */ + if (TARGET_BWX ? words > 2 : bytes > 0) + { + if (align >= 64) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, DImode, ofs); + set_mem_alias_set (mem, 0); + + mask = HOST_WIDE_INT_M1U << (bytes * 8); + + tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), NULL_RTX, + 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + else if (align >= 32 && bytes < 4) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, SImode, ofs); + set_mem_alias_set (mem, 0); + + mask = HOST_WIDE_INT_M1U << (bytes * 8); + + tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), NULL_RTX, + 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + } + + if (!TARGET_BWX && bytes >= 4) + { + sw_64_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); + bytes -= 4; + ofs += 4; + } + + if (bytes >= 2) + { + if (align >= 16) + { + do + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), + const0_rtx); + bytes -= 2; + ofs += 2; + } + while (bytes >= 2); + } + else if (!TARGET_BWX) + { + sw_64_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); + bytes -= 2; + ofs += 2; + } + } + + while (bytes > 0) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + } + + return 1; +} + +/* Returns a mask so that zap(x, value) == x & mask. */ + +rtx +sw_64_expand_zap_mask (HOST_WIDE_INT value) +{ + rtx result; + int i; + HOST_WIDE_INT mask = 0; + + for (i = 7; i >= 0; --i) + { + mask <<= 8; + if (!((value >> i) & 1)) + mask |= 0xff; + } + + result = gen_int_mode (mask, DImode); + return result; +} + +void +sw_64_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), + machine_mode mode, rtx op0, rtx op1, rtx op2) +{ + op0 = gen_lowpart (mode, op0); + + if (op1 == const0_rtx) + op1 = CONST0_RTX (mode); + else + op1 = gen_lowpart (mode, op1); + + if (op2 == const0_rtx) + op2 = CONST0_RTX (mode); + else + op2 = gen_lowpart (mode, op2); + + emit_insn ((*gen) (op0, op1, op2)); +} + +/* A subroutine of the atomic operation splitters. Jump to LABEL if + COND is true. Mark the jump as unlikely to be taken. */ + +static void +emit_unlikely_jump (rtx cond, rtx label) +{ + rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); + rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); +} + +/* Subroutines of the atomic operation splitters. Emit barriers + as needed for the memory MODEL. */ + +static void +sw_64_pre_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, true)) + emit_insn (gen_memory_barrier ()); +} + +static void +sw_64_post_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, false)) + emit_insn (gen_memory_barrier ()); +} + +/* A subroutine of the atomic operation splitters. Emit an insxl + instruction in MODE. */ + +static rtx +emit_insxl (machine_mode mode, rtx op1, rtx op2) +{ + rtx ret = gen_reg_rtx (DImode); + rtx (*fn) (rtx, rtx, rtx); + + switch (mode) + { + case E_QImode: + fn = gen_insbl; + break; + case E_HImode: + fn = gen_inswl; + break; + case E_SImode: + fn = gen_insll; + break; + case E_DImode: + fn = gen_insql; + break; + default: + gcc_unreachable (); + } + + op1 = force_reg (mode, op1); + emit_insn (fn (ret, op1, op2)); + + return ret; +} + +/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation + to perform. MEM is the memory on which to operate. VAL is the second + operand of the binary operator. BEFORE and AFTER are optional locations to + return the value of MEM either before of after the operation. SCRATCH is + a scratch register. */ + +void +sw_64_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, + rtx after, rtx scratch, enum memmodel model) +{ + machine_mode mode = GET_MODE (mem); + rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch)); + + label = gen_label_rtx (); + emit_label (label); + label = gen_rtx_LABEL_REF (DImode, label); + + if (before == NULL) + before = scratch; + emit_insn (gen_load_locked (mode, before, mem)); + + if (!TARGET_SW8A) + { + if (after) + { + rtx cond1 = gen_rtx_REG (DImode, REGNO (after)); + emit_insn (gen_rtx_SET (cond1, const1_rtx)); + emit_insn (gen_builtin_wr_f (cond1)); + } + else + { + rtx cond2 = gen_rtx_REG (DImode, 28); + emit_insn (gen_rtx_SET (cond2, const1_rtx)); + emit_insn (gen_builtin_wr_f (cond2)); + } + } + if (code == NOT) + { + x = gen_rtx_AND (mode, before, val); + emit_insn (gen_rtx_SET (val, x)); + + x = gen_rtx_NOT (mode, val); + } + else + x = gen_rtx_fmt_ee (code, mode, before, val); + if (after) + emit_insn (gen_rtx_SET (after, copy_rtx (x))); + emit_insn (gen_rtx_SET (scratch, x)); + + emit_insn (gen_store_conditional (mode, cond, mem, scratch)); + if (!TARGET_SW8A) + emit_insn (gen_builtin_rd_f (cond)); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label); +} + +/* Expand a compare and swap operation. */ + +void +sw_64_split_compare_and_swap (rtx operands[]) +{ + rtx cond, retval, mem, oldval, newval; + bool is_weak; + enum memmodel mod_s, mod_f; + machine_mode mode; + rtx label1, label2, x; + + rtx imust = operands[8]; + cond = operands[0]; + retval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = (operands[5] != const0_rtx); + mod_s = memmodel_from_int (INTVAL (operands[6])); + mod_f = memmodel_from_int (INTVAL (operands[7])); + mode = GET_MODE (mem); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + } + label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + + emit_insn (gen_load_locked (mode, retval, mem)); + x = gen_lowpart (DImode, retval); + + rtx imust1; + if (TARGET_SW8A) + { + if (oldval == const0_rtx) + { + emit_move_insn (cond, const0_rtx); + x = gen_rtx_NE (DImode, x, const0_rtx); + } + else + { + x = gen_rtx_EQ (DImode, x, oldval); + emit_insn (gen_rtx_SET (cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } + emit_unlikely_jump (x, label2); + } + else + { + x = gen_rtx_EQ (DImode, x, oldval); + imust1 = gen_lowpart (DImode, imust); + emit_insn (gen_rtx_SET (imust1, x)); + emit_insn (gen_builtin_wr_f (imust1)); + } + + emit_move_insn (cond, newval); + emit_insn (gen_store_conditional (mode, cond, mem, gen_lowpart (mode, cond))); + + if (!TARGET_SW8A) + { + emit_insn (gen_builtin_rd_f (cond)); + imust1 = gen_rtx_EQ (DImode, imust1, const0_rtx); + emit_unlikely_jump (imust1, label2); + } + if (!is_weak) + { + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label1); + } + + if (!is_mm_relaxed (mod_f)) + emit_label (XEXP (label2, 0)); + + if (is_mm_relaxed (mod_f)) + emit_label (XEXP (label2, 0)); +} + +void +sw_64_expand_compare_and_swap_12 (rtx operands[]) +{ + rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f; + machine_mode mode; + rtx addr, align, wdst; + rtx imust; + + cond = operands[0]; + dst = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + bool use_cas = GET_MODE_SIZE (mode) >= 32 && flag_sw_use_cas; + if (!use_cas) + imust = operands[8]; + + /* We forced the address into a register via mem_noofs_operand. */ + addr = XEXP (mem, 0); + gcc_assert (register_operand (addr, DImode)); + + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), NULL_RTX, 1, + OPTAB_DIRECT); + if (oldval != const0_rtx && TARGET_SW8A && use_cas) + oldval = emit_insxl (mode, oldval, addr); + oldval = convert_modes (DImode, mode, oldval, 1); + + if (newval != const0_rtx) + newval = emit_insxl (mode, newval, addr); + + wdst = gen_reg_rtx (DImode); + if (TARGET_SW8A && use_cas) + emit_insn (gen_atomic_compare_and_swap_1_target_sw8a ( + mode, cond, wdst, mem, oldval, newval, align, is_weak, mod_s, mod_f)); + else + emit_insn (gen_atomic_compare_and_swap_1 (mode, cond, wdst, mem, oldval, + newval, align, is_weak, mod_s, + mod_f, imust)); + emit_move_insn (dst, gen_lowpart (mode, wdst)); +} + +void +sw_64_split_compare_and_swap_12 (rtx operands[]) +{ + rtx cond, dest, orig_mem, oldval, newval, align, scratch; + machine_mode mode; + bool is_weak; + enum memmodel mod_s, mod_f; + rtx label1, label2, mem, addr, width, mask, x; + rtx imust; + + cond = operands[0]; + dest = operands[1]; + orig_mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + align = operands[5]; + is_weak = (operands[6] != const0_rtx); + mod_s = memmodel_from_int (INTVAL (operands[7])); + mod_f = memmodel_from_int (INTVAL (operands[8])); + imust = operands[9]; + scratch = operands[10]; + mode = GET_MODE (orig_mem); + addr = XEXP (orig_mem, 0); + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); + if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) + set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + } + label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + + emit_insn (gen_load_locked (DImode, scratch, mem)); + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + emit_insn (gen_extxl (dest, scratch, width, addr)); + + rtx imust1; + if (TARGET_SW8A) + { + if (oldval == const0_rtx) + { + emit_move_insn (cond, const0_rtx); + x = gen_rtx_NE (DImode, dest, const0_rtx); + } + else + { + x = gen_rtx_EQ (DImode, dest, oldval); + emit_insn (gen_rtx_SET (cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } + emit_unlikely_jump (x, label2); + } + else + { + x = gen_rtx_EQ (DImode, dest, oldval); + imust1 = gen_lowpart (DImode, imust); + emit_insn (gen_rtx_SET (imust1, x)); + emit_insn (gen_builtin_wr_f (imust1)); + } + + emit_insn (gen_mskxl (cond, scratch, mask, addr)); + + if (newval != const0_rtx) + emit_insn (gen_iordi3 (cond, cond, newval)); + + emit_insn (gen_store_conditional (DImode, cond, mem, cond)); + if (!TARGET_SW8A) + { + emit_insn (gen_builtin_rd_f (cond)); + imust1 = gen_rtx_EQ (DImode, imust1, const0_rtx); + emit_unlikely_jump (imust1, label2); + } + + if (!is_weak) + { + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label1); + } + + if (!is_mm_relaxed (mod_f)) + emit_label (XEXP (label2, 0)); + + if (is_mm_relaxed (mod_f)) + emit_label (XEXP (label2, 0)); +} + +/* Expand an atomic exchange operation. */ + +void +sw_64_split_atomic_exchange (rtx operands[]) +{ + rtx retval, mem, val, scratch; + enum memmodel model; + machine_mode mode; + rtx label, x, cond; + + retval = operands[0]; + mem = operands[1]; + val = operands[2]; + model = (enum memmodel) INTVAL (operands[3]); + scratch = operands[4]; + mode = GET_MODE (mem); + cond = gen_lowpart (DImode, scratch); + + label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_insn (gen_load_locked (mode, retval, mem)); + if (!TARGET_SW8A) + { + emit_insn (gen_rtx_SET (cond, const1_rtx)); + emit_insn (gen_builtin_wr_f (cond)); + } + emit_move_insn (scratch, val); + emit_insn (gen_store_conditional (mode, cond, mem, scratch)); + if (!TARGET_SW8A) + emit_insn (gen_builtin_rd_f (cond)); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label); +} + +void +sw_64_expand_atomic_exchange_12 (rtx operands[]) +{ + rtx dst, mem, val, model; + machine_mode mode; + rtx addr, align, wdst; + + dst = operands[0]; + mem = operands[1]; + val = operands[2]; + model = operands[3]; + mode = GET_MODE (mem); + + /* We forced the address into a register via mem_noofs_operand. */ + addr = XEXP (mem, 0); + gcc_assert (register_operand (addr, DImode)); + + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), NULL_RTX, 1, + OPTAB_DIRECT); + + /* Insert val into the correct byte location within the word. */ + if (val != const0_rtx) + val = emit_insxl (mode, val, addr); + + wdst = gen_reg_rtx (DImode); + emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model)); + emit_move_insn (dst, gen_lowpart (mode, wdst)); +} + +void +sw_64_split_atomic_exchange_12 (rtx operands[]) +{ + rtx dest, orig_mem, addr, val, align, scratch; + rtx label, mem, width, mask, x; + machine_mode mode; + enum memmodel model; + + dest = operands[0]; + orig_mem = operands[1]; + val = operands[2]; + align = operands[3]; + model = (enum memmodel) INTVAL (operands[4]); + scratch = operands[5]; + mode = GET_MODE (orig_mem); + addr = XEXP (orig_mem, 0); + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); + if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) + set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); + + label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_insn (gen_load_locked (DImode, scratch, mem)); + if (!TARGET_SW8A) + { + emit_insn (gen_rtx_SET (dest, const1_rtx)); + emit_insn (gen_builtin_wr_f (dest)); + } + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + emit_insn (gen_extxl (dest, scratch, width, addr)); + emit_insn (gen_mskxl (scratch, scratch, mask, addr)); + if (val != const0_rtx) + emit_insn (gen_iordi3 (scratch, scratch, val)); + + emit_insn (gen_store_conditional (DImode, scratch, mem, scratch)); + if (!TARGET_SW8A) + emit_insn (gen_builtin_rd_f (scratch)); + + x = gen_rtx_EQ (DImode, scratch, const0_rtx); + emit_unlikely_jump (x, label); +} + +/* Emit an atomic compare-and-swap operation. SI and larger modes. */ + +void +sw_64_split_atomic_cas (rtx operands[]) +{ + rtx cond, retval, mem, oldval, newval; + rtx (*gen) (rtx, rtx, rtx); + enum memmodel mod_s; + machine_mode mode; + + cond = operands[0]; + retval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + + mod_s = memmodel_from_int (INTVAL (operands[6])); + mode = GET_MODE (mem); + + if (GET_MODE (mem) == SImode && GET_MODE (oldval) == DImode + && GET_MODE (newval) == DImode) + { + oldval = gen_rtx_REG (SImode, REGNO (oldval)); + newval = gen_rtx_REG (SImode, REGNO (newval)); + } + + switch (mode) + { + case E_SImode: + gen = gen_sw_64_atomic_cassi; + break; + case E_DImode: + gen = gen_sw_64_atomic_casdi; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen_rtx_SET (retval, newval)); + emit_insn (gen (oldval, mem, retval)); + + rtx x = gen_lowpart (DImode, retval); + rtx x1 = gen_lowpart (DImode, oldval); + x = gen_rtx_EQ (DImode, x, x1); + emit_insn (gen_rtx_SET (cond, x)); +} + +/* Emit an atomic compare-and-swap operation. HI and smaller modes. */ + +void +sw_64_split_atomic_cas_12 (rtx operands[]) +{ + rtx cond, dest, orig_mem, oldval, newval, align, scratch; + machine_mode mode; + bool is_weak; + enum memmodel mod_s, mod_f; + rtx label1, label2, mem, addr, width, mask, x; + + cond = operands[0]; + dest = operands[1]; + orig_mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + align = operands[5]; + is_weak = (operands[6] != const0_rtx); + mod_s = memmodel_from_int (INTVAL (operands[7])); + mod_f = memmodel_from_int (INTVAL (operands[8])); + scratch = operands[9]; + mode = GET_MODE (orig_mem); + addr = XEXP (orig_mem, 0); + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); + if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) + set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); + + emit_move_insn (scratch, mem); + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + emit_insn (gen_extxl (dest, scratch, width, addr)); + emit_insn (gen_mskxl (cond, scratch, mask, addr)); + + rtx scratch2 = operands[10]; + if (newval != const0_rtx) + emit_insn (gen_iordi3 (scratch2, cond, newval)); + if (oldval == const0_rtx) + { + emit_move_insn (cond, const0_rtx); + x = gen_rtx_NE (DImode, dest, const0_rtx); + } + else + { + emit_insn (gen_iordi3 (scratch, cond, oldval)); + emit_insn (gen_sw_64_atomic_casdi (scratch, mem, scratch2)); + + x = gen_rtx_EQ (DImode, scratch2, scratch); + emit_insn (gen_rtx_SET (cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +sw_64_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, + unsigned int) +{ + enum attr_type dep_insn_type; + + /* If the dependence is an anti-dependence, there is no cost. For an + output dependence, there is sometimes a cost, but it doesn't seem + worth handling those few cases. */ + if (dep_type != 0) + return cost; + + /* If we can't recognize the insns, we can't really do anything. */ + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; + + dep_insn_type = get_attr_type (dep_insn); + + /* Bring in the user-defined memory latency. */ + if (dep_insn_type == TYPE_ILD || dep_insn_type == TYPE_FLD + || dep_insn_type == TYPE_LDSYM) + cost += sw_64_memory_latency - 1; + + /* Everything else handled in DFA bypasses now. */ + + return cost; +} + +/* The number of instructions that can be issued per cycle. */ + +static int +sw_64_issue_rate (void) +{ + return ((sw_64_tune == PROCESSOR_SW6 || sw_64_tune == PROCESSOR_SW8) ? 4 : 2); +} + +/* How many alternative schedules to try. This should be as wide as the + scheduling freedom in the DFA, but no wider. Making this value too + large results extra work for the scheduler. */ + +static int +sw_64_multipass_dfa_lookahead (void) +{ + return ((sw_64_tune == PROCESSOR_SW6 || sw_64_tune == PROCESSOR_SW8) ? 4 : 2); +} + +/* Machine-specific function data. */ + +struct GTY (()) sw_64_links; + +/* Information about a function's frame layout. */ +struct GTY (()) sw_64_frame_info +{ + /* The size of the frame in bytes. */ + HOST_WIDE_INT frame_size; + + /* Bit X is set if the function saves or restores GPR X. */ + unsigned HOST_WIDE_INT sa_mask; + + /* The size of the saved callee-save int/FP registers. */ + HOST_WIDE_INT saved_regs_size; + + /* The number of extra stack bytes taken up by register varargs. */ + HOST_WIDE_INT saved_varargs_size; + + /* Offset of virtual frame pointer from stack pointer/frame bottom. */ + HOST_WIDE_INT callee_offset; + + /* Offset of hard frame pointer from stack pointer/frame bottom. */ + HOST_WIDE_INT hard_frame_pointer_offset; + + HOST_WIDE_INT local_offset; + + /* The offset of arg_pointer_rtx from the bottom of the frame. */ + HOST_WIDE_INT arg_pointer_offset; + + bool emit_frame_pointer; +}; + +struct GTY (()) machine_function +{ + unsigned HOST_WIDE_INT sa_mask; + HOST_WIDE_INT sa_size; + HOST_WIDE_INT frame_size; + + /* For flag_reorder_blocks_and_partition. */ + rtx gp_save_rtx; + + /* For VMS condition handlers. */ + bool uses_condition_handler; + + struct sw_64_frame_info frame; + + /* Linkage entries. */ + hash_map *links; +}; + +/* How to allocate a 'struct machine_function'. */ + +static struct machine_function * +sw_64_init_machine_status (void) +{ + return ggc_cleared_alloc (); +} + +/* Start the ball rolling with RETURN_ADDR_RTX. */ + +rtx +sw_64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, REG_RA); +} + +/* Return or create a memory slot containing the gp value for the current + function. Needed only if TARGET_LD_BUGGY_LDGP. */ + +rtx +sw_64_gp_save_rtx (void) +{ + rtx_insn *seq; + rtx m = cfun->machine->gp_save_rtx; + + if (m == NULL) + { + start_sequence (); + + m = assign_stack_local (Pmode, UNITS_PER_WORD, BITS_PER_WORD); + m = validize_mem (m); + emit_move_insn (m, pic_offset_table_rtx); + + seq = get_insns (); + end_sequence (); + + /* We used to simply emit the sequence after entry_of_function. + However this breaks the CFG if the first instruction in the + first block is not the NOTE_INSN_BASIC_BLOCK, for example a + label. Emit the sequence properly on the edge. We are only + invoked from dw2_build_landing_pads and finish_eh_generation + will call commit_edge_insertions thanks to a kludge. */ + insert_insn_on_edge (seq, + single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + + cfun->machine->gp_save_rtx = m; + } + + return m; +} + +static void +sw_64_instantiate_decls (void) +{ + if (cfun->machine->gp_save_rtx != NULL_RTX) + instantiate_decl_rtl (cfun->machine->gp_save_rtx); +} + +static int +sw_64_ra_ever_killed (void) +{ + rtx_insn *top; + + if (!has_hard_reg_initial_val (Pmode, REG_RA)) + return (int) df_regs_ever_live_p (REG_RA); + + push_topmost_sequence (); + top = get_insns (); + pop_topmost_sequence (); + + return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL); +} + +/* Return the trap mode suffix applicable to the current + instruction, or NULL. */ + +static const char * +get_trap_mode_suffix (void) +{ + enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn); + + switch (s) + { + case TRAP_SUFFIX_NONE: + return NULL; + + case TRAP_SUFFIX_SU: + if (sw_64_fptm >= SW_64_FPTM_SU) + return "su"; + return NULL; + + case TRAP_SUFFIX_SUI: + if (sw_64_fptm >= SW_64_FPTM_SUI) + return "sui"; + return NULL; + + case TRAP_SUFFIX_V_SV: + switch (sw_64_fptm) + { + case SW_64_FPTM_N: + return NULL; + case SW_64_FPTM_U: + return "v"; + case SW_64_FPTM_SU: + case SW_64_FPTM_SUI: + return "sv"; + default: + gcc_unreachable (); + } + + case TRAP_SUFFIX_V_SV_SVI: + switch (sw_64_fptm) + { + case SW_64_FPTM_N: + return NULL; + case SW_64_FPTM_U: + return "v"; + case SW_64_FPTM_SU: + return "sv"; + case SW_64_FPTM_SUI: + return "svi"; + default: + gcc_unreachable (); + } + break; + + case TRAP_SUFFIX_U_SU_SUI: + switch (sw_64_fptm) + { + case SW_64_FPTM_N: + return NULL; + case SW_64_FPTM_U: + return "u"; + case SW_64_FPTM_SU: + return "su"; + case SW_64_FPTM_SUI: + return "sui"; + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Return the rounding mode suffix applicable to the current + instruction, or NULL. */ + +static const char * +get_round_mode_suffix (void) +{ + enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); + + switch (s) + { + case ROUND_SUFFIX_NONE: + return NULL; + case ROUND_SUFFIX_NORMAL: + switch (sw_64_fprm) + { + case SW_64_FPRM_NORM: + return NULL; + case SW_64_FPRM_MINF: + return "m"; + case SW_64_FPRM_CHOP: + return "c"; + case SW_64_FPRM_DYN: + return "d"; + default: + gcc_unreachable (); + } + break; + + case ROUND_SUFFIX_C: + return "c"; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ + +static bool +sw_64_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '/' || code == ',' || code == '-' || code == '~' + || code == '#' || code == '*' || code == '&'); +} + +/* Implement TARGET_PRINT_OPERAND. The sw_64-specific + operand codes are documented below. */ + +static const char * +get_round_mode_suffix_sw (void) +{ + enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); + + switch (s) + { + case ROUND_SUFFIX_NONE: + return NULL; + case ROUND_SUFFIX_NORMAL: + switch (sw_64_fprm) + { + case SW_64_FPRM_NORM: + return "_g"; + case SW_64_FPRM_MINF: + return "_p"; + case SW_64_FPRM_CHOP: + return "_z"; + case SW_64_FPRM_DYN: + return "_n"; + default: + gcc_unreachable (); + } + break; + + case ROUND_SUFFIX_C: + return "_z"; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} +static void +sw_64_print_operand (FILE *file, rtx x, int code) +{ + int i; + + switch (code) + { + case '~': + /* Print the assembler name of the current function. */ + assemble_name (file, sw_64_fnname); + break; + + case '&': + if (const char *name = get_some_local_dynamic_name ()) + assemble_name (file, name); + else + output_operand_lossage ("'%%&' used without any " + "local dynamic TLS references"); + break; + + case '/': + /* Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX + attributes are examined to determine what is appropriate. */ + { + const char *trap = get_trap_mode_suffix (); + const char *round = get_round_mode_suffix (); + + break; + } + + case 'T': + { + const char *round_sw = get_round_mode_suffix_sw (); + + if (round_sw) + fprintf (file, "%s", (round_sw ? round_sw : "")); + break; + } + case ',': + /* Generates single precision suffix for floating point + instructions (s for IEEE, f for VAX). */ + fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file); + break; + + case '-': + /* Generates double precision suffix for floating point + instructions (t for IEEE, g for VAX). */ + fputc ((TARGET_FLOAT_VAX ? 'g' : 'd'), file); + break; + + case '#': + if (sw_64_this_literal_sequence_number == 0) + sw_64_this_literal_sequence_number = sw_64_next_sequence_number++; + fprintf (file, "%d", sw_64_this_literal_sequence_number); + break; + + case '*': + if (sw_64_this_gpdisp_sequence_number == 0) + sw_64_this_gpdisp_sequence_number = sw_64_next_sequence_number++; + fprintf (file, "%d", sw_64_this_gpdisp_sequence_number); + break; + + case 'J': + { + const char *lituse; + + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL) + { + x = XVECEXP (x, 0, 0); + lituse = "lituse_tlsgd"; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL) + { + x = XVECEXP (x, 0, 0); + lituse = "lituse_tlsldm"; + } + else if (CONST_INT_P (x)) + lituse = "lituse_jsr"; + else + { + output_operand_lossage ("invalid %%J value"); + break; + } + + if (x != const0_rtx) + fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); + } + break; + + case 'j': + { + const char *lituse; + +#ifdef HAVE_AS_JSRDIRECT_RELOCS + lituse = "lituse_jsrdirect"; +#else + lituse = "lituse_jsr"; +#endif + + gcc_assert (INTVAL (x) != 0); + fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); + } + break; + case 'r': + /* If this operand is the constant zero, write it as "$31". */ + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (x == CONST0_RTX (GET_MODE (x))) + fprintf (file, "$31"); + else + output_operand_lossage ("invalid %%r value"); + break; + + case 'R': + /* Similar, but for floating-point. */ + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (x == CONST0_RTX (GET_MODE (x))) + fprintf (file, "$f31"); + else + output_operand_lossage ("invalid %%R value"); + break; + + case 'N': + /* Write the 1's complement of a constant. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%N value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); + break; + + case 'P': + /* Write 1 << C, for a constant C. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%P value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x)); + break; + + case 'h': + /* Write the high-order 16 bits of a constant, sign-extended. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%h value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16); + break; + + case 'L': + /* Write the low-order 16 bits of a constant, sign-extended. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%L value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000)); + break; + + case 'm': + /* Write mask for ZAP insn. */ + if (CONST_INT_P (x)) + { + HOST_WIDE_INT mask = 0, value = INTVAL (x); + + for (i = 0; i < 8; i++, value >>= 8) + if (value & 0xff) + mask |= (1 << i); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask); + } + else + output_operand_lossage ("invalid %%m value"); + break; + + case 'M': + /* 'b', 'w', 'l', or 'q' as the value of the constant. */ + if (!mode_width_operand (x, VOIDmode)) + output_operand_lossage ("invalid %%M value"); + + fprintf (file, "%s", + (INTVAL (x) == 8 + ? "b" + : INTVAL (x) == 16 ? "w" : INTVAL (x) == 32 ? "l" : "q")); + break; + + case 'U': + /* Similar, except do it from the mask. */ + if (CONST_INT_P (x)) + { + HOST_WIDE_INT value = INTVAL (x); + + if (value == 0xff) + { + fputc ('b', file); + break; + } + if (value == 0xffff) + { + fputc ('w', file); + break; + } + if (value == 0xffffffff) + { + fputc ('l', file); + break; + } + if (value == -1) + { + fputc ('q', file); + break; + } + } + /* Write "_a" for AUTO_INC_DEC access. */ + if (MEM_P (x) + && (GET_CODE (XEXP (x, 0)) == POST_INC + || GET_CODE (XEXP (x, 0)) == POST_DEC + || GET_CODE (XEXP (x, 0)) == POST_MODIFY)) + { + fprintf (file, "_a"); + break; + } + break; + + case 's': + /* Write the constant value divided by 8. */ + if (!CONST_INT_P (x) || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 + || (INTVAL (x) & 7) != 0) + output_operand_lossage ("invalid %%s value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); + break; + + case 'C': + case 'D': + case 'c': + case 'd': + /* Write out comparison name. */ + { + enum rtx_code c = GET_CODE (x); + + if (!COMPARISON_P (x)) + output_operand_lossage ("invalid %%C value"); + + else if (code == 'D') + c = reverse_condition (c); + else if (code == 'c') + c = swap_condition (c); + else if (code == 'd') + c = swap_condition (reverse_condition (c)); + + if (c == LEU) + fprintf (file, "ule"); + else if (c == LTU) + fprintf (file, "ult"); + else if (c == UNORDERED) + fprintf (file, "un"); + else + fprintf (file, "%s", GET_RTX_NAME (c)); + } + break; + + case 'E': + /* Write the divide or modulus operator. */ + switch (GET_CODE (x)) + { + case DIV: + fprintf (file, "div%s", GET_MODE (x) == SImode ? "w" : "l"); + break; + case UDIV: + fprintf (file, "div%su", GET_MODE (x) == SImode ? "w" : "l"); + break; + case MOD: + fprintf (file, "rem%s", GET_MODE (x) == SImode ? "w" : "l"); + break; + case UMOD: + fprintf (file, "rem%su", GET_MODE (x) == SImode ? "w" : "l"); + break; + default: + output_operand_lossage ("invalid %%E value"); + break; + } + break; + + case 'A': + /* Write "_u" for unaligned access. */ + if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) + fprintf (file, "_u"); + break; + + case 0: + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (MEM_P (x)) + { + if (GET_CODE (XEXP (x, 0)) == POST_INC) + fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)), + reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); + else if (GET_CODE (XEXP (x, 0)) == POST_DEC) + fprintf (file, "%d(%s)", -GET_MODE_SIZE (GET_MODE (x)), + reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); + else if (GET_CODE (XEXP (x, 0)) == POST_MODIFY) + output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1)); + else + output_address (GET_MODE (x), XEXP (x, 0)); + } + else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC) + { + switch (XINT (XEXP (x, 0), 1)) + { + case UNSPEC_DTPREL: + case UNSPEC_TPREL: + output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0)); + break; + default: + output_operand_lossage ("unknown relocation unspec"); + break; + } + } + else + output_addr_const (file, x); + break; + + default: + output_operand_lossage ("invalid %%xn code"); + } +} + +/* Implement TARGET_PRINT_OPERAND_ADDRESS. */ + +static void +sw_64_print_operand_address (FILE *file, machine_mode /*mode. */, rtx addr) +{ + int basereg = 31; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (addr) == AND) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == PLUS && CONST_INT_P (XEXP (addr, 1))) + { + offset = INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + if (GET_CODE (addr) == LO_SUM) + { + const char *reloc16, *reloclo; + rtx op1 = XEXP (addr, 1); + + if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC) + { + op1 = XEXP (op1, 0); + switch (XINT (op1, 1)) + { + case UNSPEC_DTPREL: + reloc16 = NULL; + reloclo = (sw_64_tls_size == 16 ? "dtprel" : "dtprello"); + break; + case UNSPEC_TPREL: + reloc16 = NULL; + reloclo = (sw_64_tls_size == 16 ? "tprel" : "tprello"); + break; + default: + output_operand_lossage ("unknown relocation unspec"); + return; + } + + output_addr_const (file, XVECEXP (op1, 0, 0)); + } + else + { + reloc16 = "gprel"; + reloclo = "gprellow"; + output_addr_const (file, op1); + } + + if (offset) + fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); + + addr = XEXP (addr, 0); + switch (GET_CODE (addr)) + { + case REG: + basereg = REGNO (addr); + break; + + case SUBREG: + basereg = subreg_regno (addr); + break; + + default: + gcc_unreachable (); + } + + fprintf (file, "($%d)\t\t!%s", basereg, + (basereg == 29 ? reloc16 : reloclo)); + return; + } + + switch (GET_CODE (addr)) + { + case REG: + basereg = REGNO (addr); + break; + + case SUBREG: + basereg = subreg_regno (addr); + break; + + case CONST_INT: + offset = INTVAL (addr); + break; + + case SYMBOL_REF: + gcc_assert (this_is_asm_operands); + fprintf (file, "%s", XSTR (addr, 0)); + return; + + case CONST: + gcc_assert (this_is_asm_operands); + gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF); + fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC, + XSTR (XEXP (XEXP (addr, 0), 0), 0), + INTVAL (XEXP (XEXP (addr, 0), 1))); + return; + + default: + output_operand_lossage ("invalid operand address"); + return; + } + + fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline at + M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx + for the static chain value for the function. */ + +static void +sw_64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr, mem, word1, word2; + + fnaddr = XEXP (DECL_RTL (fndecl), 0); + +#ifdef POINTERS_EXTEND_UNSIGNED + fnaddr = convert_memory_address (Pmode, fnaddr); + chain_value = convert_memory_address (Pmode, chain_value); +#endif + + /* These 4 instructions are: + ldq $1,24($27) + ldq $27,16($27) + jmp $31,($27),0 + nop + We don't bother setting the HINT field of the jump; the nop + is merely there for padding. */ + word1 = GEN_INT (HOST_WIDE_INT_C (0x8f7b00108c3b0018)); + word2 = GEN_INT (HOST_WIDE_INT_C (0x43ff075f0ffb0000)); + + /* Store the first two words, as computed above. */ + mem = adjust_address (m_tramp, DImode, 0); + emit_move_insn (mem, word1); + mem = adjust_address (m_tramp, DImode, 8); + emit_move_insn (mem, word2); + + /* Store function address and static chain value. */ + mem = adjust_address (m_tramp, Pmode, 16); + emit_move_insn (mem, fnaddr); + mem = adjust_address (m_tramp, Pmode, 24); + emit_move_insn (mem, chain_value); + + emit_insn (gen_imb ()); +#ifdef HAVE_ENABLE_EXECUTE_STACK + emit_library_call (init_one_libfunc ("__enable_execute_stack"), + LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); +#endif +} + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + + ARG is a description of the argument. + On Sw_64 the first 6 words of args are normally in registers + and the rest are pushed. */ + +static rtx +sw_64_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int basereg; + int num_args; + + /* Don't get confused and pass small structures in FP registers. */ + if (arg.aggregate_type_p ()) + basereg = 16; + else + { + /* With sw_64_split_complex_arg, we shouldn't see any raw complex + values here. */ + gcc_checking_assert (!COMPLEX_MODE_P (arg.mode)); + + /* Set up defaults for FP operands passed in FP registers, and + integral operands passed in integer registers. */ + if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT) + basereg = 32 + 16; + else + basereg = 16; + } + + /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for + the two platforms, so we can't avoid conditional compilation. */ + { + if (*cum >= 6) + return NULL_RTX; + num_args = *cum; + + if (arg.end_marker_p ()) + basereg = 16; + else if (targetm.calls.must_pass_in_stack (arg)) + return NULL_RTX; + } + + return gen_rtx_REG (arg.mode, num_args + basereg); +} + +/* Update the data in CUM to advance over an argument ARG. */ + +static void +sw_64_function_arg_advance (cumulative_args_t cum_v, + const function_arg_info &arg) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + bool onstack = targetm.calls.must_pass_in_stack (arg); + int increment = onstack ? 6 : SW_64_ARG_SIZE (arg.mode, arg.type); + + *cum += increment; +} + +static int +sw_64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) +{ + int words = 0; + CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v); + + if (*cum < 6 && 6 < *cum + SW_64_ARG_SIZE (arg.mode, arg.type)) + words = 6 - *cum; + + return words * UNITS_PER_WORD; +} + +/* Return true if ARG must be returned in memory, instead of in registers. */ + +static bool +sw_64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +{ + machine_mode mode = VOIDmode; + int size; + + if (type) + { + mode = TYPE_MODE (type); + + /* All aggregates are returned in memory, except on OpenVMS where + records that fit 64 bits should be returned by immediate value + as required by section 3.8.7.1 of the OpenVMS Calling Standard. */ + if (AGGREGATE_TYPE_P (type)) + return true; + } + + size = GET_MODE_SIZE (mode); + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_FLOAT: + /* Pass all float vectors in memory, like an aggregate. */ + return true; + + case MODE_COMPLEX_FLOAT: + /* We judge complex floats on the size of their element, + not the size of the whole type. */ + size = GET_MODE_UNIT_SIZE (mode); + break; + + case MODE_INT: + case MODE_FLOAT: + case MODE_COMPLEX_INT: + case MODE_VECTOR_INT: + break; + + default: + /* ??? We get called on all sorts of random stuff from + aggregate_value_p. We must return something, but it's not + clear what's safe to return. Pretend it's a struct I + guess. */ + return true; + } + + /* Otherwise types must fit in one register. */ + return size > UNITS_PER_WORD; +} + +/* Return true if TYPE should be passed by invisible reference. */ + +static bool +sw_64_pass_by_reference (cumulative_args_t, const function_arg_info &arg) +{ + /* Pass float and _Complex float variable arguments by reference. + This avoids 64-bit store from a FP register to a pretend args save area + and subsequent 32-bit load from the saved location to a FP register. + + Note that 32-bit loads and stores to/from a FP register on sw_64 reorder + bits to form a canonical 64-bit value in the FP register. This fact + invalidates compiler assumption that 32-bit FP value lives in the lower + 32-bits of the passed 64-bit FP value, so loading the 32-bit value from + the stored 64-bit location using 32-bit FP load is invalid on sw_64. + + This introduces sort of ABI incompatibility, but until _Float32 was + introduced, C-family languages promoted 32-bit float variable arg to + a 64-bit double, and it was not allowed to pass float as a varible + argument. Passing _Complex float as a variable argument never + worked on sw_64. Thus, we have no backward compatibility issues + to worry about, and passing unpromoted _Float32 and _Complex float + as a variable argument will actually work in the future. */ + + if (arg.mode == SFmode || arg.mode == SCmode) + return !arg.named; + + return arg.mode == TFmode || arg.mode == TCmode; +} + +/* Define how to find the value returned by a function. VALTYPE is the + data type of the value (as a tree). If the precise function being + called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. + MODE is set instead of VALTYPE for libcalls. + + On Sw_64 the value is found in $0 for integer functions and + $f0 for floating-point functions. */ + +static rtx +sw_64_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + machine_mode mode) +{ + unsigned int regnum, dummy ATTRIBUTE_UNUSED; + enum mode_class mclass; + + gcc_assert (!valtype || !sw_64_return_in_memory (valtype, func)); + + if (valtype) + mode = TYPE_MODE (valtype); + + mclass = GET_MODE_CLASS (mode); + switch (mclass) + { + case MODE_INT: + /* Do the same thing as PROMOTE_MODE except for libcalls on VMS, + where we have them returning both SImode and DImode. */ + PROMOTE_MODE (mode, dummy, valtype); + /* FALLTHRU */ + + case MODE_COMPLEX_INT: + case MODE_VECTOR_INT: + regnum = 0; + break; + + case MODE_FLOAT: + regnum = 32; + break; + + case MODE_COMPLEX_FLOAT: + { + machine_mode cmode = GET_MODE_INNER (mode); + + return gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32), + const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33), + GEN_INT (GET_MODE_SIZE (cmode))))); + } + + case MODE_RANDOM: + default: + gcc_unreachable (); + } + + return gen_rtx_REG (mode, regnum); +} + +/* Implement TARGET_FUNCTION_VALUE. */ + +static rtx +sw_64_function_value (const_tree valtype, const_tree fn_decl_or_type, + bool /* outgoing */) +{ + return sw_64_function_value_1 (valtype, fn_decl_or_type, VOIDmode); +} + +/* Implement TARGET_LIBCALL_VALUE. */ + +static rtx +sw_64_libcall_value (machine_mode mode, const_rtx /* fun */) +{ + return sw_64_function_value_1 (NULL_TREE, NULL_TREE, mode); +} + +/* Implement TARGET_FUNCTION_VALUE_REGNO_P. + + On the Sw_64, $0 $1 and $f0 $f1 are the only register thus used. */ + +static bool +sw_64_function_value_regno_p (const unsigned int regno) +{ + return (regno == 0 || regno == 1 || regno == 32 || regno == 33); +} + +/* TCmode complex values are passed by invisible reference. We + should not split these values. */ + +static bool +sw_64_split_complex_arg (const_tree type) +{ + return TYPE_MODE (type) != TCmode; +} + +static tree +sw_64_build_builtin_va_list (void) +{ + tree base, ofs, space, record, type_decl; + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL, + get_identifier ("__va_list_tag"), record); + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + + /* C++? SET_IS_AGGR_TYPE (record, 1); */ + + /* Dummy field to prevent alignment warnings. */ + space + = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, integer_type_node); + DECL_FIELD_CONTEXT (space) = record; + DECL_ARTIFICIAL (space) = 1; + DECL_IGNORED_P (space) = 1; + + ofs = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__offset"), + integer_type_node); + DECL_FIELD_CONTEXT (ofs) = record; + DECL_CHAIN (ofs) = space; + + base = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__base"), + ptr_type_node); + DECL_FIELD_CONTEXT (base) = record; + DECL_CHAIN (base) = ofs; + + TYPE_FIELDS (record) = base; + layout_type (record); + + va_list_gpr_counter_field = ofs; + return record; +} + +/* Helper function for sw_64_stdarg_optimize_hook. Skip over casts + and constant additions. */ + +static gimple * +va_list_skip_additions (tree lhs) +{ + gimple *stmt; + + for (;;) + { + enum tree_code code; + + stmt = SSA_NAME_DEF_STMT (lhs); + + if (gimple_code (stmt) == GIMPLE_PHI) + return stmt; + + if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != lhs) + return NULL; + + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) + return stmt; + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) + && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST + || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt)))) + return stmt; + + lhs = gimple_assign_rhs1 (stmt); + } +} + +/* Check if LHS = RHS statement is + LHS = *(ap.__base + ap.__offset + cst) + or + LHS = *(ap.__base + + ((ap.__offset + cst <= 47) + ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2). + If the former, indicate that GPR registers are needed, + if the latter, indicate that FPR registers are needed. + + Also look for LHS = (*ptr).field, where ptr is one of the forms + listed above. + + On sw_64, cfun->va_list_gpr_size is used as size of the needed + regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR + registers are needed and bit 1 set if FPR registers are needed. + Return true if va_list references should not be scanned for the + current statement. */ + +static bool +sw_64_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt) +{ + tree base, offset, rhs; + int offset_arg = 1; + gimple *base_stmt; + + if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) != GIMPLE_SINGLE_RHS) + return false; + + rhs = gimple_assign_rhs1 (stmt); + while (handled_component_p (rhs)) + rhs = TREE_OPERAND (rhs, 0); + if (TREE_CODE (rhs) != MEM_REF + || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) + return false; + + stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); + if (stmt == NULL || !is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) + return false; + + base = gimple_assign_rhs1 (stmt); + if (TREE_CODE (base) == SSA_NAME) + { + base_stmt = va_list_skip_additions (base); + if (base_stmt && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } + + if (TREE_CODE (base) != COMPONENT_REF + || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) + { + base = gimple_assign_rhs2 (stmt); + if (TREE_CODE (base) == SSA_NAME) + { + base_stmt = va_list_skip_additions (base); + if (base_stmt && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } + + if (TREE_CODE (base) != COMPONENT_REF + || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) + return false; + + offset_arg = 0; + } + + base = get_base_address (base); + if (TREE_CODE (base) != VAR_DECL + || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names)) + return false; + + offset = gimple_op (stmt, 1 + offset_arg); + if (TREE_CODE (offset) == SSA_NAME) + { + gimple *offset_stmt = va_list_skip_additions (offset); + + if (offset_stmt && gimple_code (offset_stmt) == GIMPLE_PHI) + { + HOST_WIDE_INT sub; + gimple *arg1_stmt, *arg2_stmt; + tree arg1, arg2; + enum tree_code code1, code2; + + if (gimple_phi_num_args (offset_stmt) != 2) + goto escapes; + + arg1_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); + arg2_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); + if (arg1_stmt == NULL || !is_gimple_assign (arg1_stmt) + || arg2_stmt == NULL || !is_gimple_assign (arg2_stmt)) + goto escapes; + + code1 = gimple_assign_rhs_code (arg1_stmt); + code2 = gimple_assign_rhs_code (arg2_stmt); + if (code1 == COMPONENT_REF + && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) + /* Do nothing. */; + else if (code2 == COMPONENT_REF + && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) + { + std::swap (arg1_stmt, arg2_stmt); + code2 = code1; + } + else + goto escapes; + + if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt))) + goto escapes; + + sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt)); + if (code2 == MINUS_EXPR) + sub = -sub; + if (sub < -48 || sub > -32) + goto escapes; + + arg1 = gimple_assign_rhs1 (arg1_stmt); + arg2 = gimple_assign_rhs1 (arg2_stmt); + if (TREE_CODE (arg2) == SSA_NAME) + { + arg2_stmt = va_list_skip_additions (arg2); + if (arg2_stmt == NULL || !is_gimple_assign (arg2_stmt) + || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) + goto escapes; + arg2 = gimple_assign_rhs1 (arg2_stmt); + } + if (arg1 != arg2) + goto escapes; + + if (TREE_CODE (arg1) != COMPONENT_REF + || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field + || get_base_address (arg1) != base) + goto escapes; + + /* Need floating point regs. */ + cfun->va_list_fpr_size |= 2; + return false; + } + if (offset_stmt && is_gimple_assign (offset_stmt) + && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) + offset = gimple_assign_rhs1 (offset_stmt); + } + if (TREE_CODE (offset) != COMPONENT_REF + || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field + || get_base_address (offset) != base) + goto escapes; + else + /* Need general regs. */ + cfun->va_list_fpr_size |= 1; + return false; + +escapes: + si->va_list_escapes = true; + return false; +} + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. */ + +static void +sw_64_setup_incoming_varargs (cumulative_args_t pcum, + const function_arg_info &arg, int *pretend_size, + int no_rtl) +{ + CUMULATIVE_ARGS cum = *get_cumulative_args (pcum); + + /* Skip the current argument. */ + targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg); + + /* On SYSV and friends, we allocate space for all 12 arg registers, but + only push those that are remaining. However, if NO registers need to + be saved, don't allocate any space. This is not only because we won't + need the space, but because AP includes the current_pretend_args_size + and we don't want to mess up any ap-relative addresses already made. + + If we are not to use the floating-point registers, save the integer + registers where we would put the floating-point registers. This is + not the most efficient way to implement varargs with just one register + class, but it isn't worth doing anything more efficient in this rare + case. */ + if (cum >= 6) + return; + + if (!no_rtl) + { + int count; + alias_set_type set = get_varargs_alias_set (); + rtx tmp; + + count = cfun->va_list_gpr_size / UNITS_PER_WORD; + if (count > 6 - cum) + count = 6 - cum; + + /* Detect whether integer registers or floating-point registers + are needed by the detected va_arg statements. See above for + how these values are computed. Note that the "escape" value + is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of + these bits set. */ + gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3); + + if (cfun->va_list_fpr_size & 1) + { + tmp = gen_rtx_MEM (BLKmode, + plus_constant (Pmode, virtual_incoming_args_rtx, + (cum + 6) * UNITS_PER_WORD)); + MEM_NOTRAP_P (tmp) = 1; + set_mem_alias_set (tmp, set); + move_block_from_reg (16 + cum, tmp, count); + } + + if (cfun->va_list_fpr_size & 2) + { + tmp = gen_rtx_MEM (BLKmode, + plus_constant (Pmode, virtual_incoming_args_rtx, + cum * UNITS_PER_WORD)); + MEM_NOTRAP_P (tmp) = 1; + set_mem_alias_set (tmp, set); + move_block_from_reg (16 + cum + TARGET_FPREGS * 32, tmp, count); + } + } +#ifdef SW_64_ENABLE_FULL_ASAN + cfun->machine->frame.saved_varargs_size = 12 * UNITS_PER_WORD; +#else + *pretend_size = 12 * UNITS_PER_WORD; +#endif +} + +static void +sw_64_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT offset; + tree t, offset_field, base_field; + + if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK) + return; + + /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base + up by 48, storing fp arg registers in the first 48 bytes, and the + integer arg registers in the next 48 bytes. This is only done, + however, if any integer registers need to be stored. + + If no integer registers need be stored, then we must subtract 48 + in order to account for the integer arg registers which are counted + in argsize above, but which are not actually stored on the stack. + Must further be careful here about structures straddling the last + integer argument register; that futzes with pretend_args_size, + which changes the meaning of AP. */ + + if (NUM_ARGS < 6) + offset = 6 * UNITS_PER_WORD; + else +#ifdef SW_64_ENABLE_FULL_ASAN + offset = -6 * UNITS_PER_WORD + cfun->machine->frame.saved_varargs_size + + crtl->args.pretend_args_size; +#else + offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size; +#endif + + base_field = TYPE_FIELDS (TREE_TYPE (valist)); + offset_field = DECL_CHAIN (base_field); + + base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), valist, + base_field, NULL_TREE); + offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), valist, + offset_field, NULL_TREE); + + t = make_tree (ptr_type_node, virtual_incoming_args_rtx); + t = fold_build_pointer_plus_hwi (t, offset); + t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD); + t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); +} + +static tree +sw_64_gimplify_va_arg_1 (tree type, tree base, tree offset, gimple_seq *pre_p) +{ + tree type_size, ptr_type, addend, t, addr; + gimple_seq internal_post; + + /* If the type could not be passed in registers, skip the block + reserved for the registers. */ + if (must_pass_va_arg_in_stack (type)) + { + t = build_int_cst (TREE_TYPE (offset), 6 * 8); + gimplify_assign (offset, build2 (MAX_EXPR, TREE_TYPE (offset), offset, t), + pre_p); + } + + addend = offset; + ptr_type = build_pointer_type_for_mode (type, ptr_mode, true); + + if (TREE_CODE (type) == COMPLEX_TYPE) + { + tree real_part, imag_part, real_temp; + + real_part + = sw_64_gimplify_va_arg_1 (TREE_TYPE (type), base, offset, pre_p); + + /* Copy the value into a new temporary, lest the formal temporary + be reused out from under us. */ + real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); + + imag_part + = sw_64_gimplify_va_arg_1 (TREE_TYPE (type), base, offset, pre_p); + + return build2 (COMPLEX_EXPR, type, real_temp, imag_part); + } + else if (TREE_CODE (type) == REAL_TYPE) + { + tree fpaddend, cond, fourtyeight; + + fourtyeight = build_int_cst (TREE_TYPE (addend), 6 * 8); + fpaddend + = fold_build2 (MINUS_EXPR, TREE_TYPE (addend), addend, fourtyeight); + cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight); + addend + = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, fpaddend, addend); + } + + /* Build the final address and force that value into a temporary. */ + addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend); + internal_post = NULL; + gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); + gimple_seq_add_seq (pre_p, internal_post); + + /* Update the offset field. */ + type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); + if (type_size == NULL || TREE_OVERFLOW (type_size)) + t = size_zero_node; + else + { + t = size_binop (PLUS_EXPR, type_size, size_int (7)); + t = size_binop (TRUNC_DIV_EXPR, t, size_int (8)); + t = size_binop (MULT_EXPR, t, size_int (8)); + } + t = fold_convert (TREE_TYPE (offset), t); + gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t), + pre_p); + + return build_va_arg_indirect_ref (addr); +} + +static tree +sw_64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + tree offset_field, base_field, offset, base, t, r; + bool indirect; + + base_field = TYPE_FIELDS (va_list_type_node); + offset_field = DECL_CHAIN (base_field); + base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), valist, + base_field, NULL_TREE); + offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), valist, + offset_field, NULL_TREE); + + /* Pull the fields of the structure out into temporaries. Since we never + modify the base field, we can use a formal temporary. Sign-extend the + offset field so that it's the proper width for pointer arithmetic. */ + base = get_formal_tmp_var (base_field, pre_p); + + t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); + offset = get_initialized_tmp_var (t, pre_p, NULL); + + indirect = pass_va_arg_by_reference (type); + + if (indirect) + { + if (TREE_CODE (type) == COMPLEX_TYPE + && targetm.calls.split_complex_arg (type)) + { + tree real_part, imag_part, real_temp; + + tree ptr_type + = build_pointer_type_for_mode (TREE_TYPE (type), ptr_mode, true); + + real_part = sw_64_gimplify_va_arg_1 (ptr_type, base, offset, pre_p); + real_part = build_va_arg_indirect_ref (real_part); + + /* Copy the value into a new temporary, lest the formal temporary + be reused out from under us. */ + real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); + + imag_part = sw_64_gimplify_va_arg_1 (ptr_type, base, offset, pre_p); + imag_part = build_va_arg_indirect_ref (imag_part); + + r = build2 (COMPLEX_EXPR, type, real_temp, imag_part); + + /* Stuff the offset temporary back into its field. */ + gimplify_assign (unshare_expr (offset_field), + fold_convert (TREE_TYPE (offset_field), offset), + pre_p); + return r; + } + else + type = build_pointer_type_for_mode (type, ptr_mode, true); + } + + /* Find the value. Note that this will be a stable indirection, or + a composite of stable indirections in the case of complex. */ + r = sw_64_gimplify_va_arg_1 (type, base, offset, pre_p); + + /* Stuff the offset temporary back into its field. */ + gimplify_assign (unshare_expr (offset_field), + fold_convert (TREE_TYPE (offset_field), offset), pre_p); + + if (indirect) + r = build_va_arg_indirect_ref (r); + + return r; +} + +/* Builtins. */ + +enum sw_64_builtin +{ + SW_64_BUILTIN_CMPBGE, + SW_64_BUILTIN_EXTBL, + SW_64_BUILTIN_EXTWL, + SW_64_BUILTIN_EXTLL, + SW_64_BUILTIN_EXTQL, + SW_64_BUILTIN_EXTWH, + SW_64_BUILTIN_EXTLH, + SW_64_BUILTIN_EXTQH, + SW_64_BUILTIN_INSBL, + SW_64_BUILTIN_INSWL, + SW_64_BUILTIN_INSLL, + SW_64_BUILTIN_INSQL, + SW_64_BUILTIN_INSWH, + SW_64_BUILTIN_INSLH, + SW_64_BUILTIN_INSQH, + SW_64_BUILTIN_MSKBL, + SW_64_BUILTIN_MSKWL, + SW_64_BUILTIN_MSKLL, + SW_64_BUILTIN_MSKQL, + SW_64_BUILTIN_MSKWH, + SW_64_BUILTIN_MSKLH, + SW_64_BUILTIN_MSKQH, + SW_64_BUILTIN_UMULH, + SW_64_BUILTIN_ZAP, + SW_64_BUILTIN_ZAPNOT, + SW_64_BUILTIN_AMASK, + SW_64_BUILTIN_IMPLVER, + SW_64_BUILTIN_RPCC, + + /* TARGET_MAX. */ + SW_64_BUILTIN_MINUB8, + SW_64_BUILTIN_MINSB8, + SW_64_BUILTIN_MINUW4, + SW_64_BUILTIN_MINSW4, + SW_64_BUILTIN_MAXUB8, + SW_64_BUILTIN_MAXSB8, + SW_64_BUILTIN_MAXUW4, + SW_64_BUILTIN_MAXSW4, + SW_64_BUILTIN_PERR, + SW_64_BUILTIN_PKLB, + SW_64_BUILTIN_PKWB, + SW_64_BUILTIN_UNPKBL, + SW_64_BUILTIN_UNPKBW, + + /* TARGET_CIX. */ + SW_64_BUILTIN_CTTZ, + SW_64_BUILTIN_CTLZ, + SW_64_BUILTIN_CTPOP, + SW_64_BUILTIN_SBT, + SW_64_BUILTIN_CBT, + + SW_64_BUILTIN_max +}; + +static enum insn_code const code_for_builtin[SW_64_BUILTIN_max] + = {CODE_FOR_builtin_cmpbge, CODE_FOR_extbl, CODE_FOR_extwl, CODE_FOR_extll, + CODE_FOR_extql, CODE_FOR_extwh, CODE_FOR_extlh, CODE_FOR_extqh, + CODE_FOR_builtin_insbl, CODE_FOR_builtin_inswl, CODE_FOR_builtin_insll, + CODE_FOR_insql, CODE_FOR_inswh, CODE_FOR_inslh, CODE_FOR_insqh, + CODE_FOR_mskbl, CODE_FOR_mskwl, CODE_FOR_mskll, CODE_FOR_mskql, + CODE_FOR_mskwh, CODE_FOR_msklh, CODE_FOR_mskqh, CODE_FOR_umuldi3_highpart, + CODE_FOR_builtin_zap, CODE_FOR_builtin_zapnot, CODE_FOR_builtin_amask, + CODE_FOR_builtin_implver, CODE_FOR_builtin_rpcc, + + + /* TARGET_MAX */ + CODE_FOR_builtin_minub8, CODE_FOR_builtin_minsb8, CODE_FOR_builtin_minuw4, + CODE_FOR_builtin_minsw4, CODE_FOR_builtin_maxub8, CODE_FOR_builtin_maxsb8, + CODE_FOR_builtin_maxuw4, CODE_FOR_builtin_maxsw4, CODE_FOR_builtin_perr, + CODE_FOR_builtin_pklb, CODE_FOR_builtin_pkwb, CODE_FOR_builtin_unpkbl, + CODE_FOR_builtin_unpkbw, + + /* TARGET_CIX */ + CODE_FOR_ctzdi2, CODE_FOR_clzdi2, CODE_FOR_popcountdi2, + + CODE_FOR_builtin_sbt, CODE_FOR_builtin_cbt}; + +struct sw_64_builtin_def +{ + const char *name; + enum sw_64_builtin code; + unsigned int target_mask; + bool is_const; +}; + +static struct sw_64_builtin_def const zero_arg_builtins[] + = {{"__builtin_sw_64_implver", SW_64_BUILTIN_IMPLVER, 0, true}, + {"__builtin_sw_64_rpcc", SW_64_BUILTIN_RPCC, 0, false}}; + +static struct sw_64_builtin_def const one_arg_builtins[] + = {{"__builtin_sw_64_amask", SW_64_BUILTIN_AMASK, 0, true}, + {"__builtin_sw_64_pklb", SW_64_BUILTIN_PKLB, MASK_MAX, true}, + {"__builtin_sw_64_pkwb", SW_64_BUILTIN_PKWB, MASK_MAX, true}, + {"__builtin_sw_64_unpkbl", SW_64_BUILTIN_UNPKBL, MASK_MAX, true}, + {"__builtin_sw_64_unpkbw", SW_64_BUILTIN_UNPKBW, MASK_MAX, true}, + {"__builtin_sw_64_cttz", SW_64_BUILTIN_CTTZ, MASK_CIX, true}, + {"__builtin_sw_64_ctlz", SW_64_BUILTIN_CTLZ, MASK_CIX, true}, + {"__builtin_sw_64_ctpop", SW_64_BUILTIN_CTPOP, MASK_CIX, true}}; + +static struct sw_64_builtin_def const two_arg_builtins[] + = {{"__builtin_sw_64_cmpbge", SW_64_BUILTIN_CMPBGE, 0, true}, + {"__builtin_sw_64_extbl", SW_64_BUILTIN_EXTBL, 0, true}, + {"__builtin_sw_64_extwl", SW_64_BUILTIN_EXTWL, 0, true}, + {"__builtin_sw_64_extll", SW_64_BUILTIN_EXTLL, 0, true}, + {"__builtin_sw_64_extql", SW_64_BUILTIN_EXTQL, 0, true}, + {"__builtin_sw_64_extwh", SW_64_BUILTIN_EXTWH, 0, true}, + {"__builtin_sw_64_extlh", SW_64_BUILTIN_EXTLH, 0, true}, + {"__builtin_sw_64_extqh", SW_64_BUILTIN_EXTQH, 0, true}, + {"__builtin_sw_64_insbl", SW_64_BUILTIN_INSBL, 0, true}, + {"__builtin_sw_64_inswl", SW_64_BUILTIN_INSWL, 0, true}, + {"__builtin_sw_64_insll", SW_64_BUILTIN_INSLL, 0, true}, + {"__builtin_sw_64_insql", SW_64_BUILTIN_INSQL, 0, true}, + {"__builtin_sw_64_inswh", SW_64_BUILTIN_INSWH, 0, true}, + {"__builtin_sw_64_inslh", SW_64_BUILTIN_INSLH, 0, true}, + {"__builtin_sw_64_insqh", SW_64_BUILTIN_INSQH, 0, true}, + {"__builtin_sw_64_mskbl", SW_64_BUILTIN_MSKBL, 0, true}, + {"__builtin_sw_64_mskwl", SW_64_BUILTIN_MSKWL, 0, true}, + {"__builtin_sw_64_mskll", SW_64_BUILTIN_MSKLL, 0, true}, + {"__builtin_sw_64_mskql", SW_64_BUILTIN_MSKQL, 0, true}, + {"__builtin_sw_64_mskwh", SW_64_BUILTIN_MSKWH, 0, true}, + {"__builtin_sw_64_msklh", SW_64_BUILTIN_MSKLH, 0, true}, + {"__builtin_sw_64_mskqh", SW_64_BUILTIN_MSKQH, 0, true}, + {"__builtin_sw_64_umulh", SW_64_BUILTIN_UMULH, 0, true}, + {"__builtin_sw_64_zap", SW_64_BUILTIN_ZAP, 0, true}, + {"__builtin_sw_64_zapnot", SW_64_BUILTIN_ZAPNOT, 0, true}, + {"__builtin_sw_64_minub8", SW_64_BUILTIN_MINUB8, MASK_MAX, true}, + {"__builtin_sw_64_minsb8", SW_64_BUILTIN_MINSB8, MASK_MAX, true}, + {"__builtin_sw_64_minuw4", SW_64_BUILTIN_MINUW4, MASK_MAX, true}, + {"__builtin_sw_64_minsw4", SW_64_BUILTIN_MINSW4, MASK_MAX, true}, + {"__builtin_sw_64_maxub8", SW_64_BUILTIN_MAXUB8, MASK_MAX, true}, + {"__builtin_sw_64_maxsb8", SW_64_BUILTIN_MAXSB8, MASK_MAX, true}, + {"__builtin_sw_64_maxuw4", SW_64_BUILTIN_MAXUW4, MASK_MAX, true}, + {"__builtin_sw_64_maxsw4", SW_64_BUILTIN_MAXSW4, MASK_MAX, true}, + {"__builtin_sw_64_perr", SW_64_BUILTIN_PERR, MASK_MAX, true}, + {"__builtin_sw_64_sbt", SW_64_BUILTIN_SBT, MASK_SW8A, true}, + {"__builtin_sw_64_cbt", SW_64_BUILTIN_CBT, MASK_SW8A, true}}; + +static GTY (()) tree sw_64_dimode_u; +static GTY (()) tree sw_64_v8qi_u; +static GTY (()) tree sw_64_v8qi_s; +static GTY (()) tree sw_64_v4hi_u; +static GTY (()) tree sw_64_v4hi_s; + +static GTY (()) tree sw_64_builtins[(int) SW_64_BUILTIN_max]; + +/* Return the sw_64 builtin for CODE. */ + +static tree +sw_64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= SW_64_BUILTIN_max) + return error_mark_node; + return sw_64_builtins[code]; +} + +/* Helper function of sw_64_init_builtins. Add the built-in specified + by NAME, TYPE, CODE, and ECF. */ + +static void +sw_64_builtin_function (const char *name, tree ftype, enum sw_64_builtin code, + unsigned ecf) +{ + tree decl = add_builtin_function (name, ftype, (int) code, BUILT_IN_MD, NULL, + NULL_TREE); + + if (ecf & ECF_CONST) + TREE_READONLY (decl) = 1; + if (ecf & ECF_NOTHROW) + TREE_NOTHROW (decl) = 1; + + sw_64_builtins[(int) code] = decl; +} + +/* Helper function of sw_64_init_builtins. Add the COUNT built-in + functions pointed to by P, with function type FTYPE. */ + +static void +sw_64_add_builtins (const struct sw_64_builtin_def *p, size_t count, tree ftype) +{ + size_t i; + + for (i = 0; i < count; ++i, ++p) + if ((target_flags & p->target_mask) == p->target_mask) + sw_64_builtin_function (p->name, ftype, p->code, + (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW); +} + +static void +sw_64_init_builtins (void) +{ + tree ftype; + + sw_64_dimode_u = lang_hooks.types.type_for_mode (DImode, 1); + sw_64_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8); + sw_64_v8qi_s = build_vector_type (intQI_type_node, 8); + sw_64_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4); + sw_64_v4hi_s = build_vector_type (intHI_type_node, 4); + + ftype = build_function_type_list (sw_64_dimode_u, NULL_TREE); + sw_64_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype); + + ftype = build_function_type_list (sw_64_dimode_u, sw_64_dimode_u, NULL_TREE); + sw_64_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype); + + ftype = build_function_type_list (sw_64_dimode_u, sw_64_dimode_u, + sw_64_dimode_u, NULL_TREE); + sw_64_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +sw_64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ +#define MAX_ARGS 2 + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); + tree arg; + call_expr_arg_iterator iter; + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + + if (fcode >= SW_64_BUILTIN_max) + internal_error ("bad builtin fcode"); + icode = code_for_builtin[fcode]; + if (icode == 0) + internal_error ("bad builtin fcode"); + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + arity = 0; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + const struct insn_operand_data *insn_op; + + if (arg == error_mark_node) + return NULL_RTX; + if (arity > MAX_ARGS) + return NULL_RTX; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + + if (!(*insn_op->predicate) (op[arity], insn_op->mode)) + op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); + arity++; + } + + if (nonvoid) + { + machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + +/* Fold the builtin for the CMPBGE instruction. This is a vector comparison + with an 8-bit output vector. OPINT contains the integer operands; bit N + of OP_CONST is set if OPINT[N] is valid. */ + +static tree +sw_64_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const) +{ + if (op_const == 3) + { + int i, val; + for (i = 0, val = 0; i < 8; ++i) + { + unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff; + unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff; + if (c0 >= c1) + val |= 1 << i; + } + return build_int_cst (sw_64_dimode_u, val); + } + else if (op_const == 2 && opint[1] == 0) + return build_int_cst (sw_64_dimode_u, 0xff); + return NULL; +} + +/* Fold the builtin for the ZAPNOT instruction. This is essentially a + specialized form of an AND operation. Other byte manipulation instructions + are defined in terms of this instruction, so this is also used as a + subroutine for other builtins. + + OP contains the tree operands; OPINT contains the extracted integer values. + Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only + OPINT may be considered. */ + +static tree +sw_64_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[], + long op_const) +{ + if (op_const & 2) + { + unsigned HOST_WIDE_INT mask = 0; + int i; + + for (i = 0; i < 8; ++i) + if ((opint[1] >> i) & 1) + mask |= (unsigned HOST_WIDE_INT) 0xff << (i * 8); + + if (op_const & 1) + return build_int_cst (sw_64_dimode_u, opint[0] & mask); + + if (op) + return fold_build2 (BIT_AND_EXPR, sw_64_dimode_u, op[0], + build_int_cst (sw_64_dimode_u, mask)); + } + else if ((op_const & 1) && opint[0] == 0) + return build_int_cst (sw_64_dimode_u, 0); + return NULL; +} + +/* Fold the builtins for the EXT family of instructions. */ + +static tree +sw_64_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + long zap_const = 2; + tree *zap_op = NULL; + + if (op_const & 2) + { + unsigned HOST_WIDE_INT loc; + + loc = opint[1] & 7; + loc *= BITS_PER_UNIT; + + if (loc != 0) + { + if (op_const & 1) + { + unsigned HOST_WIDE_INT temp = opint[0]; + if (is_high) + temp <<= loc; + else + temp >>= loc; + opint[0] = temp; + zap_const = 3; + } + } + else + zap_op = op; + } + + opint[1] = bytemask; + return sw_64_fold_builtin_zapnot (zap_op, opint, zap_const); +} + +/* Fold the builtins for the INS family of instructions. */ + +static tree +sw_64_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + if ((op_const & 1) && opint[0] == 0) + return build_int_cst (sw_64_dimode_u, 0); + + if (op_const & 2) + { + unsigned HOST_WIDE_INT temp, loc, byteloc; + tree *zap_op = NULL; + + loc = opint[1] & 7; + bytemask <<= loc; + + temp = opint[0]; + if (is_high) + { + byteloc = (64 - (loc * 8)) & 0x3f; + if (byteloc == 0) + zap_op = op; + else + temp >>= byteloc; + bytemask >>= 8; + } + else + { + byteloc = loc * 8; + if (byteloc == 0) + zap_op = op; + else + temp <<= byteloc; + } + + opint[0] = temp; + opint[1] = bytemask; + return sw_64_fold_builtin_zapnot (zap_op, opint, op_const); + } + + return NULL; +} + +static tree +sw_64_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + if (op_const & 2) + { + unsigned HOST_WIDE_INT loc; + + loc = opint[1] & 7; + bytemask <<= loc; + + if (is_high) + bytemask >>= 8; + + opint[1] = bytemask ^ 0xff; + } + + return sw_64_fold_builtin_zapnot (op, opint, op_const); +} + +static tree +sw_64_fold_vector_minmax (enum tree_code code, tree op[], tree vtype) +{ + tree op0 = fold_convert (vtype, op[0]); + tree op1 = fold_convert (vtype, op[1]); + tree val = fold_build2 (code, vtype, op0, op1); + return fold_build1 (VIEW_CONVERT_EXPR, sw_64_dimode_u, val); +} + +static tree +sw_64_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp = 0; + int i; + + if (op_const != 3) + return NULL; + + for (i = 0; i < 8; ++i) + { + unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff; + unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff; + if (a >= b) + temp += a - b; + else + temp += b - a; + } + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] >> 24) & 0xff00; + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] >> 8) & 0xff00; + temp |= (opint[0] >> 16) & 0xff0000; + temp |= (opint[0] >> 24) & 0xff000000; + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] & 0xff00) << 24; + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] & 0x0000ff00) << 8; + temp |= (opint[0] & 0x00ff0000) << 16; + temp |= (opint[0] & 0xff000000) << 24; + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + if (opint[0] == 0) + temp = 64; + else + temp = exact_log2 (opint[0] & -opint[0]); + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + if (opint[0] == 0) + temp = 64; + else + temp = 64 - floor_log2 (opint[0]) - 1; + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp, op; + + if (op_const == 0) + return NULL; + + op = opint[0]; + temp = 0; + while (op) + temp++, op &= op - 1; + + return build_int_cst (sw_64_dimode_u, temp); +} + +static tree +sw_64_builtin_sbt (int n_args, tree *op, unsigned HOST_WIDE_INT opint[], + long op_const) +{ + int i; + if (op_const == 0) + return NULL; + + if (TREE_CODE (op[0]) == INTEGER_CST) + { + error ("The first parameter cannot be a constant!"); + gcc_unreachable (); + } + + if ((opint[1] >> 63) & 0x1 & (warning_sbt_num == 1)) + warning (0, "The second parameter is negative [enabled by default]"); + + warning_sbt_num++; + return NULL; +} + +static tree +sw_64_builtin_cbt (int n_args, tree *op, unsigned HOST_WIDE_INT opint[], + long op_const) +{ + int i; + if (op_const == 0) + return NULL; + + if (TREE_CODE (op[0]) == INTEGER_CST) + { + error ("The first parameter cannot be a constant!"); + gcc_unreachable (); + } + + if ((opint[1] >> 63) & 0x1 & (warning_cbt_num == 1)) + warning (0, "The second parameter is negative [enabled by default]"); + + warning_cbt_num++; + return NULL; +} + +/* Fold one of our builtin functions. */ + +static tree +sw_64_fold_builtin (tree fndecl, int n_args, tree *op, + bool ignore ATTRIBUTE_UNUSED) +{ + unsigned HOST_WIDE_INT opint[MAX_ARGS]; + long op_const = 0; + int i; + + if (n_args > MAX_ARGS) + return NULL; + + for (i = 0; i < n_args; i++) + { + tree arg = op[i]; + if (arg == error_mark_node) + return NULL; + + opint[i] = 0; + if (TREE_CODE (arg) == INTEGER_CST) + { + op_const |= 1L << i; + opint[i] = int_cst_value (arg); + } + } + + switch (DECL_MD_FUNCTION_CODE (fndecl)) + { + case SW_64_BUILTIN_CMPBGE: + return sw_64_fold_builtin_cmpbge (opint, op_const); + + case SW_64_BUILTIN_EXTBL: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0x01, false); + case SW_64_BUILTIN_EXTWL: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0x03, false); + case SW_64_BUILTIN_EXTLL: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0x0f, false); + case SW_64_BUILTIN_EXTQL: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0xff, false); + case SW_64_BUILTIN_EXTWH: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0x03, true); + case SW_64_BUILTIN_EXTLH: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0x0f, true); + case SW_64_BUILTIN_EXTQH: + return sw_64_fold_builtin_extxx (op, opint, op_const, 0xff, true); + + case SW_64_BUILTIN_INSBL: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0x01, false); + case SW_64_BUILTIN_INSWL: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0x03, false); + case SW_64_BUILTIN_INSLL: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0x0f, false); + case SW_64_BUILTIN_INSQL: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0xff, false); + case SW_64_BUILTIN_INSWH: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0x03, true); + case SW_64_BUILTIN_INSLH: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0x0f, true); + case SW_64_BUILTIN_INSQH: + return sw_64_fold_builtin_insxx (op, opint, op_const, 0xff, true); + + case SW_64_BUILTIN_MSKBL: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x01, false); + case SW_64_BUILTIN_MSKWL: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x03, false); + case SW_64_BUILTIN_MSKLL: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x0f, false); + case SW_64_BUILTIN_MSKQL: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0xff, false); + case SW_64_BUILTIN_MSKWH: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x03, true); + case SW_64_BUILTIN_MSKLH: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x0f, true); + case SW_64_BUILTIN_MSKQH: + return sw_64_fold_builtin_mskxx (op, opint, op_const, 0xff, true); + + case SW_64_BUILTIN_ZAP: + opint[1] ^= 0xff; + /* FALLTHRU */ + case SW_64_BUILTIN_ZAPNOT: + return sw_64_fold_builtin_zapnot (op, opint, op_const); + + case SW_64_BUILTIN_MINUB8: + return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v8qi_u); + case SW_64_BUILTIN_MINSB8: + return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v8qi_s); + case SW_64_BUILTIN_MINUW4: + return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v4hi_u); + case SW_64_BUILTIN_MINSW4: + return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v4hi_s); + case SW_64_BUILTIN_MAXUB8: + return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v8qi_u); + case SW_64_BUILTIN_MAXSB8: + return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v8qi_s); + case SW_64_BUILTIN_MAXUW4: + return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v4hi_u); + case SW_64_BUILTIN_MAXSW4: + return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v4hi_s); + + case SW_64_BUILTIN_PERR: + return sw_64_fold_builtin_perr (opint, op_const); + case SW_64_BUILTIN_PKLB: + return sw_64_fold_builtin_pklb (opint, op_const); + case SW_64_BUILTIN_PKWB: + return sw_64_fold_builtin_pkwb (opint, op_const); + case SW_64_BUILTIN_UNPKBL: + return sw_64_fold_builtin_unpkbl (opint, op_const); + case SW_64_BUILTIN_UNPKBW: + return sw_64_fold_builtin_unpkbw (opint, op_const); + + case SW_64_BUILTIN_CTTZ: + return sw_64_fold_builtin_cttz (opint, op_const); + case SW_64_BUILTIN_CTLZ: + return sw_64_fold_builtin_ctlz (opint, op_const); + case SW_64_BUILTIN_CTPOP: + return sw_64_fold_builtin_ctpop (opint, op_const); + case SW_64_BUILTIN_SBT: + return sw_64_builtin_sbt (n_args, op, opint, op_const); + case SW_64_BUILTIN_CBT: + return sw_64_builtin_cbt (n_args, op, opint, op_const); + case SW_64_BUILTIN_AMASK: + case SW_64_BUILTIN_IMPLVER: + case SW_64_BUILTIN_RPCC: + /* None of these are foldable at compile-time. */ + default: + return NULL; + } +} + +bool +sw_64_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + bool changed = false; + gimple *stmt = gsi_stmt (*gsi); + tree call = gimple_call_fn (stmt); + gimple *new_stmt = NULL; + + if (call) + { + tree fndecl = gimple_call_fndecl (stmt); + + if (fndecl) + { + tree arg0, arg1; + + switch (DECL_MD_FUNCTION_CODE (fndecl)) + { + case SW_64_BUILTIN_UMULH: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + + new_stmt = gimple_build_assign (gimple_call_lhs (stmt), + MULT_HIGHPART_EXPR, arg0, arg1); + break; + default: + break; + } + } + } + + if (new_stmt) + { + gsi_replace (gsi, new_stmt, true); + changed = true; + } + + return changed; +} + +/* This page contains routines that are used to determine what the function + prologue and epilogue code will do and write them out. */ + +/* Compute the size of the save area in the stack. */ + +/* These variables are used for communication between the following functions. + They indicate various things about the current function being compiled + that are used to tell what kind of prologue, epilogue and procedure + descriptor to generate. */ + +/* Nonzero if we need a stack procedure. */ +enum sw_64_procedure_types +{ + PT_NULL = 0, + PT_REGISTER = 1, + PT_STACK = 2 +}; +static enum sw_64_procedure_types sw_64_procedure_type; + +/* Compute register masks for saved registers, register save area size, + and total frame size. */ +static void +sw_64_compute_frame_layout (void) +{ + unsigned HOST_WIDE_INT sa_mask = 0; + HOST_WIDE_INT frame_size; + int sa_size; + + /* When outputting a thunk, we don't have valid register life info, + but assemble_start_function wants to output .frame and .mask + directives. */ + if (!cfun->is_thunk) + { + /* One for every register we have to save. */ + for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (!call_used_or_fixed_reg_p (i) && df_regs_ever_live_p (i) + && i != REG_RA) + sa_mask |= HOST_WIDE_INT_1U << i; + + /* We need to restore these for the handler. */ + if (crtl->calls_eh_return) + { + for (unsigned i = 0;; ++i) + { + unsigned regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + sa_mask |= HOST_WIDE_INT_1U << regno; + } + } + /* If any register spilled, then spill the return address also. */ + /* ??? This is required by the Digital stack unwind specification + and isn't needed if we're doing Dwarf2 unwinding. */ + if (sa_mask || sw_64_ra_ever_killed ()) + sa_mask |= HOST_WIDE_INT_1U << REG_RA; + } + sa_size = popcount_hwi (sa_mask); + frame_size = get_frame_size (); + + /* Our size must be even (multiple of 16 bytes). */ + if (sa_size & 1) + sa_size++; + sa_size *= 8; + + frame_size = (SW_64_ROUND (crtl->outgoing_args_size) + sa_size + + SW_64_ROUND (frame_size + crtl->args.pretend_args_size)); + + cfun->machine->sa_mask = sa_mask; + cfun->machine->sa_size = sa_size; + cfun->machine->frame_size = frame_size; +} + +#undef TARGET_COMPUTE_FRAME_LAYOUT +#define TARGET_COMPUTE_FRAME_LAYOUT sw_64_layout_frame + +/* Return 1 if this function can directly return via $26. */ + +bool +direct_return (void) +{ + return (reload_completed && cfun->machine->frame_size == 0); +} + +bool +sw_64_find_lo_sum_using_gp (rtx insn) +{ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + { + const_rtx x = *iter; + if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx) + return true; + } + return false; +} + +static int +sw_64_does_function_need_gp (void) +{ + rtx_insn *insn; + + /* We need the gp to load the address of __mcount. */ + if (TARGET_PROFILING_NEEDS_GP && crtl->profile) + return 1; + + /* The code emitted by sw_64_output_mi_thunk_sysv uses the gp. */ + if (cfun->is_thunk) + return 1; + + /* The nonlocal receiver pattern assumes that the gp is valid for + the nested function. Reasonable because it's almost always set + correctly already. For the cases where that's wrong, make sure + the nested function loads its gp on entry. */ + if (crtl->has_nonlocal_goto) + return 1; + + /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first. + Even if we are a static function, we still need to do this in case + our address is taken and passed to something like qsort. */ + + push_topmost_sequence (); + insn = get_insns (); + pop_topmost_sequence (); + + for (; insn; insn = NEXT_INSN (insn)) + if (NONDEBUG_INSN_P (insn) && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER && get_attr_usegp (insn)) + return 1; + + return 0; +} + +/* Helper function to set RTX_FRAME_RELATED_P on instructions, including + sequences. */ + +static rtx_insn * +set_frame_related_p (void) +{ + rtx_insn *seq = get_insns (); + rtx_insn *insn; + + end_sequence (); + + if (!seq) + return NULL; + + if (INSN_P (seq)) + { + insn = seq; + while (insn != NULL_RTX) + { + RTX_FRAME_RELATED_P (insn) = 1; + insn = NEXT_INSN (insn); + } + seq = emit_insn (seq); + } + else + { + seq = emit_insn (seq); + RTX_FRAME_RELATED_P (seq) = 1; + } + return seq; +} + +#define FRP(exp) (start_sequence (), exp, set_frame_related_p ()) + +/* Generates a store with the proper unwind info attached. VALUE is + stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG + contains SP+FRAME_BIAS, and that is the unwind info that should be + generated. If FRAME_REG != VALUE, then VALUE is being stored on + behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */ + +static void +emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias, + HOST_WIDE_INT base_ofs, rtx frame_reg) +{ + rtx addr, mem; + rtx_insn *insn; + + addr = plus_constant (Pmode, base_reg, base_ofs); + mem = gen_frame_mem (Pmode, addr); + + insn = emit_move_insn (mem, value); + RTX_FRAME_RELATED_P (insn) = 1; + + if (frame_bias || value != frame_reg) + { + if (frame_bias) + { + addr + = plus_constant (Pmode, stack_pointer_rtx, frame_bias + base_ofs); + mem = gen_rtx_MEM (Pmode, addr); + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_SET (mem, frame_reg)); + } +} + +static void +emit_frame_store (unsigned int regno, rtx base_reg, HOST_WIDE_INT frame_bias, + HOST_WIDE_INT base_ofs) +{ + rtx reg = gen_rtx_REG (DImode, regno); + emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); +} + +static void +emit_frame_store_32 (unsigned int regno, rtx base_reg, HOST_WIDE_INT frame_bias, + HOST_WIDE_INT base_ofs) +{ + rtx reg = gen_rtx_REG (Pmode, regno); + emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); +} + +/* Write function prologue. */ +static void +sw64_add_cfa_expression (rtx_insn *insn, unsigned int reg, rtx base, + poly_int64 offset) +{ + rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset)); + add_reg_note (insn, REG_CFA_EXPRESSION, + gen_rtx_SET (mem, regno_reg_rtx[reg])); +} + +void +sw_64_expand_prologue (void) +{ + /* Registers to save. */ + unsigned HOST_WIDE_INT sa_mask = cfun->machine->frame.sa_mask; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size = cfun->machine->frame.saved_regs_size; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; + /* Probed stack size; it additionally includes the size of + the "reserve region" if any. */ + HOST_WIDE_INT probed_size, sa_bias; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + rtx sa_reg; + bool fp_flag = false; + + if (flag_stack_usage_info) + current_function_static_stack_size = frame_size; + +#ifdef SW_64_ENABLE_FULL_ASAN + reg_offset = aligned_upper_bound (crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT); +#else + reg_offset = SW_64_ROUND (crtl->outgoing_args_size); +#endif + + /* Emit an insn to reload GP, if needed. */ + sw_64_function_needs_gp = sw_64_does_function_need_gp (); + if (sw_64_function_needs_gp) + { + if (TARGET_SW_M32) + emit_insn (gen_prologue_ldgp_32 ()); + else + emit_insn (gen_prologue_ldgp ()); + } + + if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0 + && (TARGET_SW_32ALIGN || TARGET_SW_SIMD)) + { + rtx const16 = gen_rtx_REG (DImode, 7); + sw_64_emit_set_const (const16, DImode, 16, 3, false); + emit_insn (gen_anddi3 (const16, const16, stack_pointer_rtx)); + emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, const16)); + + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-32))); + rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (0)); + rtx tmp7 = gen_rtx_MEM (Pmode, mem_address); + emit_move_insn (tmp7, gen_rtx_REG (DImode, 7)); + } + /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert + the call to mcount ourselves, rather than having the linker do it + magically in response to -pg. Since _mcount has special linkage, + don't represent the call as a call. */ + if (TARGET_PROFILING_NEEDS_GP && crtl->profile) + emit_insn (gen_prologue_mcount ()); + + if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0 + && flag_sw_hardware_prefetch) + { + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256))); + rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8)); + rtx tmp16 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16)); + rtx tmp17 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24)); + rtx tmp18 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32)); + rtx tmp19 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40)); + rtx tmp26 = gen_rtx_MEM (Pmode, mem_address); + + emit_move_insn (tmp16, gen_rtx_REG (DImode, 16)); + emit_move_insn (tmp17, gen_rtx_REG (DImode, 17)); + emit_move_insn (tmp18, gen_rtx_REG (DImode, 18)); + emit_move_insn (tmp19, gen_rtx_REG (DImode, 19)); + emit_move_insn (tmp26, gen_rtx_REG (DImode, 26)); + + rtx tmp_clt = gen_rtx_REG (DImode, 7); + rtx tmp_cnt = gen_rtx_REG (DImode, 8); + rtx op = gen_rtx_REG (DImode, 17); + + unsigned long clt1, clt2, clt3; + unsigned long cnt1, cnt2, cnt3; + clt1 = flag_hardware_prefetch_clt % 2; + clt2 = (flag_hardware_prefetch_clt >> 1) % 2; + clt3 = (flag_hardware_prefetch_clt >> 2) % 2; + cnt1 = flag_hardware_prefetch_cnt_l1; + cnt2 = flag_hardware_prefetch_cnt_l2; + cnt3 = flag_hardware_prefetch_cnt_l3; + sw_64_emit_set_const (op, DImode, 0x10, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x11, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x12, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x1, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x4, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x8, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + emit_move_insn (gen_rtx_REG (DImode, 16), tmp16); + emit_move_insn (gen_rtx_REG (DImode, 17), tmp17); + emit_move_insn (gen_rtx_REG (DImode, 18), tmp18); + emit_move_insn (gen_rtx_REG (DImode, 19), tmp19); + emit_move_insn (gen_rtx_REG (DImode, 26), tmp26); + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256))); + } + if (strcmp ("exit", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0) + { + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256))); + rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8)); + rtx tmp16 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16)); + rtx tmp17 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24)); + rtx tmp18 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32)); + rtx tmp19 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40)); + rtx tmp26 = gen_rtx_MEM (Pmode, mem_address); + + emit_move_insn (tmp16, gen_rtx_REG (DImode, 16)); + emit_move_insn (tmp17, gen_rtx_REG (DImode, 17)); + emit_move_insn (tmp18, gen_rtx_REG (DImode, 18)); + emit_move_insn (tmp19, gen_rtx_REG (DImode, 19)); + emit_move_insn (tmp26, gen_rtx_REG (DImode, 26)); + + rtx tmp_clt = gen_rtx_REG (DImode, 7); + rtx tmp_cnt = gen_rtx_REG (DImode, 8); + rtx op = gen_rtx_REG (DImode, 17); + + unsigned long clt1, clt2, clt3; + unsigned long cnt1, cnt2, cnt3; + clt1 = 1; + clt2 = 0; + clt3 = 1; + cnt1 = 0; + cnt2 = 0; + cnt3 = 5; + sw_64_emit_set_const (op, DImode, 0x10, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x11, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x12, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x1, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x4, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x8, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + emit_move_insn (gen_rtx_REG (DImode, 16), tmp16); + emit_move_insn (gen_rtx_REG (DImode, 17), tmp17); + emit_move_insn (gen_rtx_REG (DImode, 18), tmp18); + emit_move_insn (gen_rtx_REG (DImode, 19), tmp19); + emit_move_insn (gen_rtx_REG (DImode, 26), tmp26); + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256))); + } + + /* Adjust the stack by the frame size. If the frame size is > 4096 + bytes, we need to be sure we probe somewhere in the first and last + 4096 bytes (we can probably get away without the latter test) and + every 8192 bytes in between. If the frame size is > 32768, we + do this in a loop. Otherwise, we generate the explicit probe + instructions. + + Note that we are only allowed to adjust sp once in the prologue. */ + + probed_size = frame_size; + if (flag_stack_check || flag_stack_clash_protection) + probed_size += get_stack_check_protect (); + + if (probed_size <= 32768) + { + if (probed_size > 4096) + { + int probed; + + for (probed = 4096; probed < probed_size; probed += 8192) + emit_insn (gen_stack_probe_internal (GEN_INT (-probed))); + + /* We only have to do this probe if we aren't saving registers or + if we are probing beyond the frame because of -fstack-check. */ + if ((sa_size == 0 && probed_size > probed - 4096) || flag_stack_check + || flag_stack_clash_protection) + emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size))); + } + + if (frame_size != 0) + { + if (TARGET_SW_M32) + FRP (emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-frame_size)))); + else + FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-frame_size)))); + } + } + else + { + /* Here we generate code to set R22 to SP + 4096 and set R23 to the + number of 8192 byte blocks to probe. We then probe each block + in the loop and then set SP to the proper location. If the + amount remaining is > 4096, we have to do one more probe if we + are not saving any registers or if we are probing beyond the + frame because of -fstack-check. */ + + HOST_WIDE_INT blocks = (probed_size + 4096) / 8192; + HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192; + rtx ptr = gen_rtx_REG (DImode, 22); + rtx count = gen_rtx_REG (DImode, 23); + rtx seq; + + emit_move_insn (count, GEN_INT (blocks)); + emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096))); + + /* Because of the difficulty in emitting a new basic block this + late in the compilation, generate the loop as a single insn. */ + emit_insn (gen_prologue_stack_probe_loop (count, ptr)); + + if ((leftover > 4096 && sa_size == 0) || flag_stack_check + || flag_stack_clash_protection) + { + rtx last = gen_rtx_MEM (Pmode, plus_constant (Pmode, ptr, -leftover)); + MEM_VOLATILE_P (last) = 1; + emit_move_insn (last, const0_rtx); + } + + if (flag_stack_check || flag_stack_clash_protection) + { + /* If -fstack-check is specified we have to load the entire + constant into a register and subtract from the sp in one go, + because the probed stack size is not equal to the frame size. */ + HOST_WIDE_INT lo, hi; + lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; + hi = frame_size - lo; + + emit_move_insn (ptr, GEN_INT (hi)); + emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo))); + seq = emit_insn ( + gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, ptr)); + } + else + { + seq = emit_insn ( + gen_adddi3 (stack_pointer_rtx, ptr, GEN_INT (-leftover))); + } + + /* This alternative is special, because the DWARF code cannot + possibly intuit through the loop above. So we invent this + note it looks at instead. */ + RTX_FRAME_RELATED_P (seq) = 1; + add_reg_note (seq, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -frame_size))); + } + + /* Cope with very large offsets to the register save area. */ + sa_bias = 0; + sa_reg = stack_pointer_rtx; + if (reg_offset + sa_size > 0x8000) + { + int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; + rtx sa_bias_rtx; + + if (low + sa_size <= 0x8000) + sa_bias = reg_offset - low, reg_offset = low; + else + sa_bias = reg_offset, reg_offset = 0; + + sa_reg = gen_rtx_REG (DImode, 24); + sa_bias_rtx = GEN_INT (sa_bias); + + if (add_operand (sa_bias_rtx, DImode)) + emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx)); + else + { + emit_move_insn (sa_reg, sa_bias_rtx); + emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg)); + } + } + + /* Save register RA next, followed by any other registers + that need to be saved. */ + for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi (sa_mask)) + { + /* if we need a frame pointer, set it from the stack pointer. */ + if (frame_pointer_needed && i != REG_RA && fp_flag == false) + { + if (TARGET_SW_M32) + { + emit_frame_store_32 (HARD_FRAME_POINTER_REGNUM, sa_reg, sa_bias, + reg_offset); + } + else + { + emit_frame_store (HARD_FRAME_POINTER_REGNUM, sa_reg, sa_bias, + reg_offset); + sa_mask &= ~(HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM); + reg_offset += 8; + fp_flag = true; + } + } + else + { + if (TARGET_SW_M32) + { + emit_frame_store_32 (i, sa_reg, sa_bias, reg_offset); + } + else + { + emit_frame_store (i, sa_reg, sa_bias, reg_offset); + reg_offset += 8; + sa_mask &= ~(HOST_WIDE_INT_1U << i); + } + } + } + + /* If we need a frame pointer, set it from the stack pointer. */ + if (frame_pointer_needed) + { + if (TARGET_CAN_FAULT_IN_PROLOGUE) + { + unsigned reg2 = 15; // FP + unsigned reg1 = 26; // R26 + long adj_size = SW_64_ROUND (crtl->outgoing_args_size); + if (adj_size > 0x8000) + { + int low = ((adj_size & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT bias; + + if (low <= 0x8000) + bias = adj_size - low, adj_size = low; + else + bias = adj_size, adj_size = 0; + + rtx fp_move; + rtx sa_reg_exp + = plus_constant (Pmode, stack_pointer_rtx, bias); + emit_move_insn (hard_frame_pointer_rtx, sa_reg_exp); + if (adj_size != 0) + fp_move + = gen_adddi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, GEN_INT (adj_size)); + + if ((void *) fp_move == NULL) + printf ("unable gen add3"); + emit_insn (fp_move); + } + else + { + rtx fp_move + = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, + GEN_INT ( + SW_64_ROUND (crtl->outgoing_args_size))); + FRP (emit_insn (fp_move)); + } + rtx_insn *insn = get_last_insn (); + if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) + { + rtx src + = plus_constant (Pmode, stack_pointer_rtx, + SW_64_ROUND (crtl->outgoing_args_size)); + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (hard_frame_pointer_rtx, src)); + } + + emit_insn ( + gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); + } + else + /* This must always be the last instruction in the + prologue, thus we emit a special move + clobber. */ + FRP (emit_insn ( + gen_init_fp (hard_frame_pointer_rtx, stack_pointer_rtx, sa_reg))); + } + + /* The ABIs for VMS and OSF/1 say that while we can schedule insns into + the prologue, for exception handling reasons, we cannot do this for + any insn that might fault. We could prevent this for mems with a + (clobber:BLK (scratch)), but this doesn't work for fp insns. So we + have to prevent all such scheduling with a blockage. + + Linux, on the other hand, never bothered to implement OSF/1's + exception handling, and so doesn't care about such things. Anyone + planning to use dwarf2 frame-unwind info can also omit the blockage. */ + + if (!TARGET_CAN_FAULT_IN_PROLOGUE) + emit_insn (gen_blockage ()); +} + +/* Count the number of .file directives, so that .loc is up to date. */ +int num_source_filenames = 0; + +/* Output the textual info surrounding the prologue. */ + +void +sw_64_start_function (FILE *file, const char *fnname, + tree decl ATTRIBUTE_UNUSED) +{ + unsigned long imask, fmask; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; + /* The maximum debuggable frame size. */ + const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + char *entry_label = (char *) alloca (strlen (fnname) + 6); + char *tramp_label = (char *) alloca (strlen (fnname) + 6); + int i; + + sw_64_fnname = fnname; + const char *main = "main"; + if (flag_fpcr_set == 4 && strcmp (fnname, main) == 0) + stfp3_flag = 1; + else + stfp3_flag = 0; + + reg_offset = SW_64_ROUND (crtl->outgoing_args_size); + + imask = cfun->machine->frame.sa_mask & 0xffffffffu; + fmask = cfun->machine->frame.sa_mask >> 32; + /* Issue function start and label. */ + if (!flag_inhibit_size_directive) + { + fputs ("\t.ent ", file); + assemble_name (file, fnname); + putc ('\n', file); + + /* If the function needs GP, we'll write the "..ng" label there. + Otherwise, do it here. */ + if (!sw_64_function_needs_gp && !cfun->is_thunk) + { + putc ('$', file); + assemble_name (file, fnname); + fputs ("..ng:\n", file); + } + } + /* Nested functions on VMS that are potentially called via trampoline + get a special transfer entry point that loads the called functions + procedure descriptor and static chain. */ + strcpy (entry_label, fnname); + + ASM_OUTPUT_LABEL (file, entry_label); + inside_function = TRUE; + + if (TARGET_IEEE_CONFORMANT && !flag_inhibit_size_directive) + { + /* Set flags in procedure descriptor to request IEEE-conformant + math-library routines. The value we set it to is PDSC_EXC_IEEE + (/usr/include/pdsc.h). */ + fputs ("\t.eflag 48\n", file); + } + + /* Set up offsets to sw_64 virtual arg/local debugging pointer. */ + sw_64_auto_offset = -frame_size + cfun->machine->frame.saved_varargs_size + + crtl->args.pretend_args_size; + sw_64_arg_offset = -frame_size + 48; + + /* Describe our frame. If the frame size is larger than an integer, + print it as zero to avoid an assembler error. We won't be + properly describing such a frame, but that's the best we can do. */ + if (!flag_inhibit_size_directive) + fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n", + (frame_pointer_needed ? HARD_FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM), + frame_size >= max_frame_size ? 0 : frame_size, + crtl->args.pretend_args_size); + + /* Describe which registers were spilled. */ + if (!flag_inhibit_size_directive) + { + if (imask) + { + fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask, + frame_size >= max_frame_size ? 0 : reg_offset - frame_size); + + for (i = 0; i < 32; ++i) + if (imask & (1UL << i)) + reg_offset += 8; + } + + if (fmask) + fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask, + frame_size >= max_frame_size ? 0 : reg_offset - frame_size); + } +} + +/* Emit the .prologue note at the scheduled end of the prologue. */ + +static void +sw_64_output_function_end_prologue (FILE *file) +{ + if (!flag_inhibit_size_directive) + fprintf (file, "\t.prologue %d\n", + sw_64_function_needs_gp || cfun->is_thunk); +} + +/* Write function epilogue. */ + +void +sw_64_expand_epilogue (void) +{ + /* Registers to save. */ + unsigned HOST_WIDE_INT sa_mask = cfun->machine->frame.sa_mask; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size = cfun->machine->frame.saved_regs_size; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + int fp_is_frame_pointer, fp_offset; + rtx sa_reg, sa_reg_exp = NULL; + rtx sp_adj1, sp_adj2, mem, reg, insn; + rtx eh_ofs; + rtx cfa_restores = NULL_RTX; + bool fp_flag = false; + +#ifdef SW_64_ENABLE_FULL_ASAN + reg_offset = aligned_upper_bound (crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT); +#else + reg_offset = SW_64_ROUND (crtl->outgoing_args_size); +#endif + + if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0 + && flag_sw_hardware_prefetch) + { + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256))); + rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8)); + rtx tmp16 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16)); + rtx tmp17 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24)); + rtx tmp18 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32)); + rtx tmp19 = gen_rtx_MEM (Pmode, mem_address); + mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40)); + rtx tmp26 = gen_rtx_MEM (Pmode, mem_address); + + emit_move_insn (tmp16, gen_rtx_REG (DImode, 16)); + emit_move_insn (tmp17, gen_rtx_REG (DImode, 17)); + emit_move_insn (tmp18, gen_rtx_REG (DImode, 18)); + emit_move_insn (tmp19, gen_rtx_REG (DImode, 19)); + emit_move_insn (tmp26, gen_rtx_REG (DImode, 26)); + + rtx tmp_clt = gen_rtx_REG (DImode, 7); + rtx tmp_cnt = gen_rtx_REG (DImode, 8); + rtx op = gen_rtx_REG (DImode, 17); + + unsigned long clt1, clt2, clt3; + unsigned long cnt1, cnt2, cnt3; + clt1 = 1; + clt2 = 0; + clt3 = 1; + cnt1 = 0; + cnt2 = 0; + cnt3 = 5; + sw_64_emit_set_const (op, DImode, 0x10, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x11, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x12, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x1, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x4, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + sw_64_emit_set_const (op, DImode, 0x8, 3, false); + sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false); + emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op)); + + emit_move_insn (gen_rtx_REG (DImode, 16), tmp16); + emit_move_insn (gen_rtx_REG (DImode, 17), tmp17); + emit_move_insn (gen_rtx_REG (DImode, 18), tmp18); + emit_move_insn (gen_rtx_REG (DImode, 19), tmp19); + emit_move_insn (gen_rtx_REG (DImode, 26), tmp26); + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256))); + } + + fp_is_frame_pointer = frame_pointer_needed; + fp_offset = 0; + sa_reg = stack_pointer_rtx; + + if (crtl->calls_eh_return) + eh_ofs = EH_RETURN_STACKADJ_RTX; + else + eh_ofs = NULL_RTX; + + if (sa_size) + { + /* If we have a frame pointer, restore SP from it. */ + if (frame_pointer_needed) + { + long adj_size = SW_64_ROUND (crtl->outgoing_args_size); + if (adj_size > 0x8000) + { + int low = ((adj_size & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT bias; + + if (low <= 0x8000) + bias = adj_size - low, adj_size = low; + else + bias = adj_size, adj_size = 0; + + rtx sa_reg = stack_pointer_rtx; + rtx sa_reg_exp + = plus_constant (Pmode, hard_frame_pointer_rtx, -bias); + emit_move_insn (sa_reg, sa_reg_exp); + if (adj_size != 0) + emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-adj_size))); + } + else + { + emit_insn ( + gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); + rtx insn + = gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, + GEN_INT ( + -SW_64_ROUND (crtl->outgoing_args_size))); + emit_insn (insn); + } + } + // emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); + + /* Cope with very large offsets to the register save area. */ + if (reg_offset + sa_size > 0x8000) + { + int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT bias; + + if (low + sa_size <= 0x8000) + bias = reg_offset - low, reg_offset = low; + else + bias = reg_offset, reg_offset = 0; + + sa_reg = gen_rtx_REG (Pmode, 22); + sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias); + + emit_move_insn (sa_reg, sa_reg_exp); + } + + /* Restore registers in order, excepting a true frame pointer. */ + for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi (sa_mask)) + { + if (fp_is_frame_pointer && i != REG_RA && fp_flag == false) + { + emit_insn (gen_blockage ()); + mem = gen_frame_mem (DImode, + plus_constant (Pmode, sa_reg, reg_offset)); + emit_move_insn (hard_frame_pointer_rtx, mem); + cfa_restores + = alloc_reg_note (REG_CFA_RESTORE, hard_frame_pointer_rtx, + cfa_restores); + sa_mask &= ~(1UL << HARD_FRAME_POINTER_REGNUM); + reg_offset += 8; + fp_offset = reg_offset; + fp_flag = true; + } + else + { + mem = gen_frame_mem (Pmode, + plus_constant (Pmode, sa_reg, reg_offset)); + reg = gen_rtx_REG (Pmode, i); + emit_move_insn (reg, mem); + cfa_restores + = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + reg_offset += 8; + sa_mask &= ~(HOST_WIDE_INT_1U << i); + } + } + } + + if (frame_size || eh_ofs) + { + sp_adj1 = stack_pointer_rtx; + + if (eh_ofs) + { + sp_adj1 = gen_rtx_REG (Pmode, 23); + emit_move_insn (sp_adj1, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs)); + } + + /* If the stack size is large, begin computation into a temporary + register so as not to interfere with a potential fp restore, + which must be consecutive with an SP restore. */ + if (frame_size < 32768 && !cfun->calls_alloca) + sp_adj2 = GEN_INT (frame_size); + else if (frame_size < 0x40007fffL) + { + int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; + + sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low); + if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2)) + sp_adj1 = sa_reg; + else + { + sp_adj1 = gen_rtx_REG (Pmode, 23); + emit_move_insn (sp_adj1, sp_adj2); + } + sp_adj2 = GEN_INT (low); + } + else + { + rtx tmp = gen_rtx_REG (Pmode, 23); + sp_adj2 = sw_64_emit_set_const (tmp, Pmode, frame_size, 3, false); + if (!sp_adj2) + { + /* We can't drop new things to memory this late, afaik, + so build it up by pieces. */ + sp_adj2 = sw_64_emit_set_long_const (tmp, frame_size); + gcc_assert (sp_adj2); + } + } + + /* Restore the stack pointer. */ + emit_insn (gen_blockage ()); + if (sp_adj2 == const0_rtx) + insn = emit_move_insn (stack_pointer_rtx, sp_adj1); + else + insn = emit_move_insn (stack_pointer_rtx, + gen_rtx_PLUS (Pmode, sp_adj1, sp_adj2)); + REG_NOTES (insn) = cfa_restores; + add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + gcc_assert (cfa_restores == NULL); + } + if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0 + && (TARGET_SW_32ALIGN || TARGET_SW_SIMD)) + { + rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (0)); + rtx tmp7 = gen_rtx_MEM (Pmode, mem_address); + emit_move_insn (gen_rtx_REG (DImode, 7), tmp7); + rtx const16 = gen_rtx_REG (DImode, 7); + emit_insn ( + gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (32))); + emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, const16)); + } +} + +/* Output the rest of the textual info surrounding the epilogue. */ + +void +sw_64_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED) +{ + rtx_insn *insn; + + /* We output a nop after noreturn calls at the very end of the function to + ensure that the return address always remains in the caller's code range, + as not doing so might confuse unwinding engines. */ + insn = get_last_insn (); + if (!INSN_P (insn)) + insn = prev_active_insn (insn); + if (insn && CALL_P (insn)) + output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL); + + /* End the function. */ + if (!flag_inhibit_size_directive) + { + fputs ("\t.end ", file); + assemble_name (file, fnname); + putc ('\n', file); + } + inside_function = FALSE; +} + +/* Emit a tail call to FUNCTION after adjusting THIS by DELTA. + + In order to avoid the hordes of differences between generated code + with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating + lots of code loading up large constants, generate rtl and emit it + instead of going straight to text. + + Not sure why this idea hasn't been explored before... */ + +static void +sw_64_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); + HOST_WIDE_INT hi, lo; + rtx this_rtx, funexp; + rtx_insn *insn; + + /* We always require a valid GP. */ + if (TARGET_SW_M32) + emit_insn (gen_prologue_ldgp_32 ()); + else + emit_insn (gen_prologue_ldgp ()); + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Find the "this" pointer. If the function returns a structure, + the structure return pointer is in $16. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, 17); + else + this_rtx = gen_rtx_REG (Pmode, 16); + + /* Add DELTA. When possible we use ldih+ldi. Otherwise load the + entire constant for the add. */ + lo = ((delta & 0xffff) ^ 0x8000) - 0x8000; + hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (hi + lo == delta) + { + if (hi) + emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi))); + if (lo) + emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo))); + } + else + { + rtx tmp = sw_64_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta); + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Add a delta stored in the vtable at VCALL_OFFSET. */ + if (vcall_offset) + { + rtx tmp, tmp2; + + tmp = gen_rtx_REG (Pmode, 0); + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); + + lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000; + hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (hi + lo == vcall_offset) + { + if (hi) + emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi))); + } + else + { + tmp2 + = sw_64_emit_set_long_const (gen_rtx_REG (Pmode, 1), vcall_offset); + emit_insn (gen_adddi3 (tmp, tmp, tmp2)); + lo = 0; + } + if (lo) + tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo)); + else + tmp2 = tmp; + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2)); + + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Generate a tail call to the target function. */ + if (!TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + + /* Run just enough of rest_of_compilation to get the insns emitted. + There's not really enough bulk here to make other passes such as + instruction scheduling worth while. */ + insn = get_insns (); + shorten_branches (insn); + assemble_start_function (thunk_fndecl, fnname); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + assemble_end_function (thunk_fndecl, fnname); +} + + +/* Debugging support. */ + +#include "gstab.h" + +/* Name of the file containing the current function. */ + +static const char *current_function_file = ""; + +/* Offsets to sw_64 virtual arg/local debugging pointers. */ + +long sw_64_arg_offset; +long sw_64_auto_offset; + +/* Emit a new filename to a stream. */ + +void +sw_64_output_filename (FILE *stream, const char *name) +{ + static int first_time = TRUE; + + if (first_time) + { + first_time = FALSE; + ++num_source_filenames; + current_function_file = name; + fprintf (stream, "\t.file\t "); + output_quoted_string (stream, name); + fprintf (stream, "\n"); + } + + else if (name != current_function_file + && strcmp (name, current_function_file) != 0) + { + ++num_source_filenames; + current_function_file = name; + fprintf (stream, "\t.file\t "); + + output_quoted_string (stream, name); + fprintf (stream, "\n"); + } +} + +/* Structure to show the current status of registers and memory. */ + +struct shadow_summary +{ + struct { + unsigned int i : 31; /* Mask of int regs. */ + unsigned int fp : 31; /* Mask of fp regs. */ + unsigned int mem : 1; /* mem == imem | fpmem. */ + } used, defd; +}; + +/* Summary the effects of expression X on the machine. Update SUM, a pointer + to the summary structure. SET is nonzero if the insn is setting the + object, otherwise zero. */ + +static void +summarize_insn (rtx x, struct shadow_summary *sum, int set) +{ + const char *format_ptr; + int i, j; + + if (x == 0) + return; + + switch (GET_CODE (x)) + { + /* ??? Note that this case would be incorrect if the Sw_64 had a + ZERO_EXTRACT in SET_DEST. */ + case SET: + summarize_insn (SET_SRC (x), sum, 0); + summarize_insn (SET_DEST (x), sum, 1); + break; + + case CLOBBER: + summarize_insn (XEXP (x, 0), sum, 1); + break; + + case USE: + summarize_insn (XEXP (x, 0), sum, 0); + break; + + case ASM_OPERANDS: + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) + summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0); + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + summarize_insn (XVECEXP (x, 0, i), sum, 0); + break; + + case SUBREG: + summarize_insn (SUBREG_REG (x), sum, 0); + break; + + case REG: + { + int regno = REGNO (x); + unsigned long mask = ((unsigned long) 1) << (regno % 32); + + if (regno == 31 || regno == 63) + break; + + if (set) + { + if (regno < 32) + sum->defd.i |= mask; + else + sum->defd.fp |= mask; + } + else + { + if (regno < 32) + sum->used.i |= mask; + else + sum->used.fp |= mask; + } + } + break; + + case MEM: + if (set) + sum->defd.mem = 1; + else + sum->used.mem = 1; + + /* Find the regs used in memory address computation: */ + summarize_insn (XEXP (x, 0), sum, 0); + break; + + case CONST_INT: + case CONST_WIDE_INT: + case CONST_DOUBLE: + case SYMBOL_REF: + case LABEL_REF: + case CONST: + case SCRATCH: + case ASM_INPUT: + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: + case PLUS: + case MINUS: + case MULT: + case DIV: + case MOD: + case UDIV: + case UMOD: + case AND: + case IOR: + case XOR: + case ASHIFT: + case ROTATE: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + case SMIN: + case SMAX: + case UMIN: + case UMAX: + case NE: + case EQ: + case GE: + case GT: + case LE: + case LT: + case GEU: + case GTU: + case LEU: + case LTU: + summarize_insn (XEXP (x, 0), sum, 0); + summarize_insn (XEXP (x, 1), sum, 0); + break; + + case NEG: + case NOT: + case SIGN_EXTEND: + case ZERO_EXTEND: + case TRUNCATE: + case FLOAT_EXTEND: + case FLOAT_TRUNCATE: + case FLOAT: + case FIX: + case UNSIGNED_FLOAT: + case UNSIGNED_FIX: + case ABS: + case SQRT: + case FFS: + summarize_insn (XEXP (x, 0), sum, 0); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case 'e': + summarize_insn (XEXP (x, i), sum, 0); + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + summarize_insn (XVECEXP (x, i, j), sum, 0); + break; + + case 'i': + break; + + default: + gcc_unreachable (); + } + } +} + +/* Ensure a sufficient number of `memb' insns are in the code when + the user requests code with a trap precision of functions or + instructions. + + In naive mode, when the user requests a trap-precision of + "instruction", a memb is needed after every instruction that may + generate a trap. This ensures that the code is resumption safe but + it is also slow. + + When optimizations are turned on, we delay issuing a memb as long + as possible. In this context, a trap shadow is the sequence of + instructions that starts with a (potentially) trap generating + instruction and extends to the next memb. We can delay (and + therefore sometimes omit) a memb subject to the following + conditions: + + (a) On entry to the trap shadow, if any Sw_64 register or memory + location contains a value that is used as an operand value by some + instruction in the trap shadow (live on entry), then no instruction + in the trap shadow may modify the register or memory location. + + (b) Within the trap shadow, the computation of the base register + for a memory load or store instruction may not involve using the + result of an instruction that might generate an UNPREDICTABLE + result. + + (c) Within the trap shadow, no register may be used more than once + as a destination register. (This is to make life easier for the + trap-handler.) + + (d) The trap shadow may not include any branch instructions. */ + +static void +sw_64_handle_trap_shadows (void) +{ + struct shadow_summary shadow; + int trap_pending, exception_nesting; + rtx_insn *i, *n; + + trap_pending = 0; + exception_nesting = 0; + shadow.used.i = 0; + shadow.used.fp = 0; + shadow.used.mem = 0; + shadow.defd = shadow.used; + + for (i = get_insns (); i; i = NEXT_INSN (i)) + { + if (NOTE_P (i)) + { + switch (NOTE_KIND (i)) + { + case NOTE_INSN_EH_REGION_BEG: + exception_nesting++; + if (trap_pending) + goto close_shadow; + break; + + case NOTE_INSN_EH_REGION_END: + exception_nesting--; + if (trap_pending) + goto close_shadow; + break; + + case NOTE_INSN_EPILOGUE_BEG: + if (trap_pending && sw_64_tp >= SW_64_TP_FUNC) + goto close_shadow; + break; + } + } + else if (trap_pending) + { + if (sw_64_tp == SW_64_TP_FUNC) + { + if (JUMP_P (i) && GET_CODE (PATTERN (i)) == RETURN) + goto close_shadow; + } + else if (sw_64_tp == SW_64_TP_INSN) + { + if (optimize > 0) + { + struct shadow_summary sum; + + sum.used.i = 0; + sum.used.fp = 0; + sum.used.mem = 0; + sum.defd = sum.used; + + switch (GET_CODE (i)) + { + case INSN: + /* Annoyingly, get_attr_trap will die on these. */ + if (GET_CODE (PATTERN (i)) == USE + || GET_CODE (PATTERN (i)) == CLOBBER) + break; + + summarize_insn (PATTERN (i), &sum, 0); + + if ((sum.defd.i & shadow.defd.i) + || (sum.defd.fp & shadow.defd.fp)) + { + /* (c) would be violated. */ + goto close_shadow; + } + + /* Combine shadow with summary of current insn: */ + shadow.used.i |= sum.used.i; + shadow.used.fp |= sum.used.fp; + shadow.used.mem |= sum.used.mem; + shadow.defd.i |= sum.defd.i; + shadow.defd.fp |= sum.defd.fp; + shadow.defd.mem |= sum.defd.mem; + + if ((sum.defd.i & shadow.used.i) + || (sum.defd.fp & shadow.used.fp) + || (sum.defd.mem & shadow.used.mem)) + { + /* (a) would be violated (also takes care of (b)) */ + gcc_assert (get_attr_trap (i) != TRAP_YES + || (!(sum.defd.i & sum.used.i) + && !(sum.defd.fp & sum.used.fp))); + + goto close_shadow; + } + break; + + case BARRIER: + /* __builtin_unreachable can expand to no code at all, + leaving (barrier) RTXes in the instruction stream. */ + goto close_shadow_notrapb; + + case JUMP_INSN: + case CALL_INSN: + case CODE_LABEL: + goto close_shadow; + + case DEBUG_INSN: + break; + + default: + gcc_unreachable (); + } + } + else + { + close_shadow: + n = emit_insn_before (gen_trapb (), i); + PUT_MODE (n, TImode); + PUT_MODE (i, TImode); + close_shadow_notrapb: + trap_pending = 0; + shadow.used.i = 0; + shadow.used.fp = 0; + shadow.used.mem = 0; + shadow.defd = shadow.used; + } + } + } + + if ((exception_nesting > 0 || sw_64_tp >= SW_64_TP_FUNC) + && NONJUMP_INSN_P (i) && GET_CODE (PATTERN (i)) != USE + && GET_CODE (PATTERN (i)) != CLOBBER && get_attr_trap (i) == TRAP_YES) + { + if (optimize && !trap_pending) + summarize_insn (PATTERN (i), &shadow, 0); + trap_pending = 1; + } + } +} + +/* Sw_64 can only issue instruction groups simultaneously if they are + suitably aligned. This is very processor-specific. */ + +/* The instruction group alignment main loop. */ + +static void +sw_64_align_insns_1 (unsigned int max_align, + rtx_insn *(*next_group) (rtx_insn *, int *, int *), + rtx (*next_nop) (int *)) +{ + /* ALIGN is the known alignment for the insn group. */ + unsigned int align; + /* OFS is the offset of the current insn in the insn group. */ + int ofs; + int prev_in_use, in_use, len, ldgp; + rtx_insn *i, *next; + + /* Let shorten branches care for assigning alignments to code labels. */ + shorten_branches (get_insns ()); + + unsigned int option_alignment = align_functions.levels[0].get_value (); + if (option_alignment < 4) + align = 4; + else if ((unsigned int) option_alignment < max_align) + align = option_alignment; + else + align = max_align; + + ofs = prev_in_use = 0; + i = get_insns (); + if (NOTE_P (i)) + i = next_nonnote_insn (i); + + ldgp = sw_64_function_needs_gp ? 8 : 0; + + while (i) + { + next = (*next_group) (i, &in_use, &len); + + /* When we see a label, resync alignment etc. */ + if (LABEL_P (i)) + { + unsigned int new_align + = label_to_alignment (i).levels[0].get_value (); + if (new_align >= align) + { + align = new_align < max_align ? new_align : max_align; + ofs = 0; + } + + else if (ofs & (new_align - 1)) + ofs = (ofs | (new_align - 1)) + 1; + gcc_assert (!len); + } + + /* Handle complex instructions special. */ + else if (in_use == 0) + { + /* Asms will have length < 0. This is a signal that we have + lost alignment knowledge. Assume, however, that the asm + will not mis-align instructions. */ + if (len < 0) + { + ofs = 0; + align = 4; + len = 0; + } + } + + /* If the known alignment is smaller than the recognized insn group, + realign the output. */ + else if ((int) align < len) + { + unsigned int new_log_align = len > 8 ? 4 : 3; + rtx_insn *prev, *where; + + where = prev = prev_nonnote_insn (i); + if (!where || !LABEL_P (where)) + where = i; + + /* Can't realign between a call and its gp reload. */ + if (!(TARGET_EXPLICIT_RELOCS && prev && CALL_P (prev))) + { + emit_insn_before (gen_realign (GEN_INT (new_log_align)), where); + align = 1 << new_log_align; + ofs = 0; + } + } + + /* We may not insert padding inside the initial ldgp sequence. */ + else if (ldgp > 0) + ldgp -= len; + + /* If the group won't fit in the same INT16 as the previous, + we need to add padding to keep the group together. Rather + than simply leaving the insn filling to the assembler, we + can make use of the knowledge of what sorts of instructions + were issued in the previous group to make sure that all of + the added nops are really free. */ + else if (ofs + len > (int) align) + { + int nop_count = (align - ofs) / 4; + rtx_insn *where; + + /* Insert nops before labels, branches, and calls to truly merge + the execution of the nops with the previous instruction group. */ + where = prev_nonnote_insn (i); + if (where) + { + if (LABEL_P (where)) + { + rtx_insn *where2 = prev_nonnote_insn (where); + if (where2 && JUMP_P (where2)) + where = where2; + } + else if (NONJUMP_INSN_P (where)) + where = i; + } + else + where = i; + + do + emit_insn_before ((*next_nop) (&prev_in_use), where); + while (--nop_count); + ofs = 0; + } + + ofs = (ofs + len) & (align - 1); + prev_in_use = in_use; + i = next; + } +} + +static void +sw_64_align_insns (void) +{ + gcc_unreachable (); +} + +/* Insert an unop between sibcall or noreturn function call and GP load. */ + +static void +sw_64_pad_function_end (void) +{ + rtx_insn *insn, *next; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!CALL_P (insn) + || !(SIBLING_CALL_P (insn) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))) + continue; + + next = next_active_insn (insn); + if (next) + { + rtx pat = PATTERN (next); + + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE + && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1) + emit_insn_after (gen_unop (), insn); + } + } +} + +/* Machine dependent reorg pass. */ + +static void +sw_64_reorg (void) +{ + /* Workaround for a linker error that triggers when an exception + handler immediatelly follows a sibcall or a noreturn function. + +In the sibcall case: + + The instruction stream from an object file: + + 1d8: 00 00 fb 6b jmp (t12) + 1dc: 00 00 ba 27 ldih gp,0(ra) + 1e0: 00 00 bd 23 ldi gp,0(gp) + 1e4: 00 00 7d a7 ldl t12,0(gp) + 1e8: 00 40 5b 6b call ra,(t12),1ec <__funcZ+0x1ec> + + was converted in the final link pass to: + + 12003aa88: 67 fa ff c3 br 120039428 <...> + 12003aa8c: 00 00 fe 2f unop + 12003aa90: 00 00 fe 2f unop + 12003aa94: 48 83 7d a7 ldl t12,-31928(gp) + 12003aa98: 00 40 5b 6b call ra,(t12),12003aa9c <__func+0x1ec> + +And in the noreturn case: + + The instruction stream from an object file: + + 54: 00 40 5b 6b call ra,(t12),58 <__func+0x58> + 58: 00 00 ba 27 ldih gp,0(ra) + 5c: 00 00 bd 23 ldi gp,0(gp) + 60: 00 00 7d a7 ldl t12,0(gp) + 64: 00 40 5b 6b call ra,(t12),68 <__func+0x68> + + was converted in the final link pass to: + + fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8> + fdb28: 00 00 fe 2f unop + fdb2c: 00 00 fe 2f unop + fdb30: 30 82 7d a7 ldl t12,-32208(gp) + fdb34: 00 40 5b 6b call ra,(t12),fdb38 <__func+0x68> + + GP load instructions were wrongly cleared by the linker relaxation + pass. This workaround prevents removal of GP loads by inserting + an unop instruction between a sibcall or noreturn function call and + exception handler prologue. */ + + if (current_function_has_exception_handlers ()) + sw_64_pad_function_end (); +} + +static void +sw_64_file_start (void) +{ + default_file_start (); + + fputs ("\t.set noreorder\n", asm_out_file); + fputs ("\t.set volatile\n", asm_out_file); + fputs ("\t.set noat\n", asm_out_file); + if (TARGET_EXPLICIT_RELOCS) + fputs ("\t.set nomacro\n", asm_out_file); + if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX + | TARGET_SW6A | TARGET_SW6B | TARGET_SW8A) + { + const char *arch; + + if (sw_64_cpu == PROCESSOR_SW6 || PROCESSOR_SW8 || TARGET_FIX + || TARGET_CIX) + { + if (TARGET_SW6A) + arch = "sw6a"; + else if (TARGET_SW6B) + arch = "sw6b"; + else if (TARGET_SW8A) + arch = "sw8a"; + else + arch = "sw6b"; + } + else + arch = "sw6b"; + + fprintf (asm_out_file, "\t.arch %s\n", arch); + } +} + +/* Since we don't have a .dynbss section, we should not allow global + relocations in the .rodata section. */ + +static int +sw_64_elf_reloc_rw_mask (void) +{ + return flag_pic ? 3 : 2; +} + +/* Return a section for X. The only special thing we do here is to + honor small data. */ + +static section * +sw_64_elf_select_rtx_section (machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) +{ + if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value) + /* ??? Consider using mergeable sdata sections. */ + return sdata_section; + else + return default_elf_select_rtx_section (mode, x, align); +} + +static unsigned int +sw_64_elf_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = 0; + + if (strcmp (name, ".sdata") == 0 || strncmp (name, ".sdata.", 7) == 0 + || strncmp (name, ".gnu.linkonce.s.", 16) == 0 + || strcmp (name, ".sbss") == 0 || strncmp (name, ".sbss.", 6) == 0 + || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) + flags = SECTION_SMALL; + + flags |= default_section_type_flags (decl, name, reloc); + return flags; +} + +/* Structure to collect function names for final output in link section. */ +/* Note that items marked with GTY can't be ifdef'ed out. */ + +enum reloc_kind +{ + KIND_LINKAGE, + KIND_CODEADDR +}; + +struct GTY (()) sw_64_links +{ + rtx func; + rtx linkage; + enum reloc_kind rkind; +}; + +rtx +sw_64_use_linkage (rtx func ATTRIBUTE_UNUSED, bool lflag ATTRIBUTE_UNUSED, + bool rflag ATTRIBUTE_UNUSED) +{ + return NULL_RTX; +} + +static void +sw_64_init_libfuncs (void) +{ +#ifdef MEM_LIBFUNCS_INIT + MEM_LIBFUNCS_INIT; +#endif +} + +/* On the Sw_64, we use this to disable the floating-point registers + when they don't exist. */ + +static void +sw_64_conditional_register_usage (void) +{ + int i; + if (!TARGET_FPREGS) + for (i = 32; i < 63; i++) + fixed_regs[i] = call_used_regs[i] = 1; +} + +/* Canonicalize a comparison from one we don't have to one we do have. */ + +static void +sw_64_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) +{ + if (!op0_preserve_value + && (*code == GE || *code == GT || *code == GEU || *code == GTU) + && (REG_P (*op1) || *op1 == const0_rtx)) + { + std::swap (*op0, *op1); + *code = (int) swap_condition ((enum rtx_code) * code); + } + + if ((*code == LT || *code == LTU) && CONST_INT_P (*op1) + && INTVAL (*op1) == 256) + { + *code = *code == LT ? LE : LEU; + *op1 = GEN_INT (255); + } +} + +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ + +static void +sw_64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17); + + tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv; + tree new_fenv_var, reload_fenv, restore_fnenv; + tree update_call, atomic_feraiseexcept, hold_fnclex; + + /* Generate the equivalent of : + unsigned long fenv_var; + fenv_var = __ieee_get_fp_control (); + + unsigned long masked_fenv; + masked_fenv = fenv_var & mask; + + __ieee_set_fp_control (masked_fenv); */ + + fenv_var = create_tmp_var_raw (long_unsigned_type_node); + get_fpscr + = build_fn_decl ("__ieee_get_fp_control", + build_function_type_list (long_unsigned_type_node, NULL)); + set_fpscr = build_fn_decl ("__ieee_set_fp_control", + build_function_type_list (void_type_node, NULL)); + mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK); + ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var, + build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE); + masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask); + hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv); + *hold = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), + hold_fnclex); + + /* Store the value of masked_fenv to clear the exceptions: + __ieee_set_fp_control (masked_fenv); */ + + *clear = build_call_expr (set_fpscr, 1, masked_fenv); + + /* Generate the equivalent of : + unsigned long new_fenv_var; + new_fenv_var = __ieee_get_fp_control (); + + __ieee_set_fp_control (fenv_var); + + __atomic_feraiseexcept (new_fenv_var); */ + + new_fenv_var = create_tmp_var_raw (long_unsigned_type_node); + reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var, + build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE); + restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var); + atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + update_call + = build_call_expr (atomic_feraiseexcept, 1, + fold_convert (integer_type_node, new_fenv_var)); + *update = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, reload_fenv, + restore_fnenv), + update_call); +} + +/* Implement TARGET_HARD_REGNO_MODE_OK. On Sw_64, the integer registers + can hold any mode. The floating-point registers can hold 64-bit + integers as well, but not smaller values. */ + +static bool +sw_64_hard_regno_mode_ok (unsigned int regno, machine_mode mode) +{ + if (IN_RANGE (regno, 32, 62)) + return (mode == SFmode || mode == DFmode || mode == DImode || mode == SCmode + || mode == DCmode); + return true; +} + +/* Implement TARGET_MODES_TIEABLE_P. This asymmetric test is true when + MODE1 could be put in an FP register but MODE2 could not. */ + +static bool +sw_64_modes_tieable_p (machine_mode mode1, machine_mode mode2) +{ + return (sw_64_hard_regno_mode_ok (32, mode1) + ? sw_64_hard_regno_mode_ok (32, mode2) + : true); +} + +/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ + +/************************************************* + * + * float fast_inverse_sqrt (float x) + * { + * float xhalf = 0.5f * x; + * int i = *(int *)&x ; + * i = 0x5f3759df - (i >> 1); + * x = *(float *)&i; + * x = x *(1.5f - xhalf * x *x); + * x = x *(1.5f - xhalf * x *x); // SPEC2006 435 need this + * return x; + * } + * + ***************************************************/ + +/* Load up a constant. all of the vector elements. */ +static rtx +sw_64_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) +{ + rtx reg; + + if (mode == SFmode || mode == DFmode) + { + rtx d = const_double_from_real_value (dconst, mode); + reg = force_reg (mode, d); + } + else + gcc_unreachable (); + + return reg; +} + +void +sw_64_emit_rsqrt (rtx dst, rtx x, bool note_p) +{ + machine_mode mode = GET_MODE (dst); + rtx one, xhalf, mhalf, i, magical, x0, x1, x2; + + enum insn_code code = optab_handler (smul_optab, mode); + insn_gen_fn gen_mul = GEN_FCN (code); + gcc_assert (code != CODE_FOR_nothing); + + enum insn_code code1 = optab_handler (sub_optab, SImode); + insn_gen_fn gen_sub = GEN_FCN (code1); + gcc_assert (code1 != CODE_FOR_nothing); + + enum insn_code code2 = optab_handler (fnma_optab, mode); + insn_gen_fn gen_fnma = GEN_FCN (code2); + gcc_assert (code2 != CODE_FOR_nothing); + + enum insn_code code3 = optab_handler (add_optab, mode); + insn_gen_fn gen_add = GEN_FCN (code3); + gcc_assert (code3 != CODE_FOR_nothing); + + one = sw_64_load_constant_and_splat (mode, dconst1); + mhalf = sw_64_load_constant_and_splat (mode, dconsthalf); + + /* xhalf = 0.5f * x. */ + xhalf = gen_reg_rtx (mode); + emit_insn (gen_mul (xhalf, mhalf, x)); + + if (x == CONST0_RTX (mode)) + gcc_unreachable (); + + /* int i = *(int *)&x. */ + rtx vreg = gen_rtx_REG (SFmode, 28); + + emit_insn ( + gen_rtx_SET (vreg, gen_rtx_UNSPEC (mode, gen_rtvec (1, x), UNSPEC_FIMOVS))); + + /* i = i >> 1. */ + i = gen_reg_rtx (DImode); + rtx subreg = gen_rtx_SUBREG (SImode, vreg, 0); + emit_insn (gen_extendsidi2 (i, subreg)); + emit_insn (gen_ashrdi3 (i, i, const1_rtx)); + + /* magical number: 0x5f3759df. */ + magical = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (magical, GEN_INT (0x5f370000))); + emit_insn ( + gen_rtx_SET (magical, gen_rtx_PLUS (SImode, magical, GEN_INT (0x59df)))); + + /* x0 = 0x5f3759df - i. */ + subreg = gen_rtx_SUBREG (SImode, i, 0); + x0 = gen_reg_rtx (SImode); + emit_insn (gen_sub (x0, magical, subreg)); + + /* x = *(float *)&x0. */ + x = gen_rtx_REG (mode, 60); + x0 = gen_rtx_SUBREG (SFmode, x0, 0); + emit_insn (gen_rtx_SET (x, x0)); + + /* x= x *(1.5f - xhalf * x *x) */ + rtx number = gen_reg_rtx (mode); + emit_insn (gen_add (number, one, mhalf)); + + x1 = gen_reg_rtx (mode); + emit_insn (gen_mul (x1, x, x)); + emit_insn (gen_fnma (x1, x1, xhalf, number)); + emit_insn (gen_mul (x1, x1, x)); + + /* second iteration, SPEC2006 435 need this. */ + x2 = gen_reg_rtx (mode); + emit_insn (gen_mul (x2, x1, x1)); + emit_insn (gen_fnma (x2, x2, xhalf, number)); + emit_insn (gen_mul (dst, x2, x1)); +} + +rtx +gen_move_reg (rtx x) +{ + rtx temp = gen_reg_rtx (GET_MODE (x)); + emit_move_insn (temp, x); + return temp; +} + +/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P, + * * add a reg_note saying that this was a division. Support both scalar + * and + * * vector divide. Assumes no trapping math and finite arguments. */ +void +sw_64_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) +{ + machine_mode mode = GET_MODE (dst); + rtx one, x0, e0, x1, x2, xprev, eprev, xnext, enext, u, v; + int i; + + int passes = flag_sw_recip_precision ? 2 : 1; + if (mode == DFmode) + passes += 2; + + enum insn_code code = optab_handler (smul_optab, mode); + insn_gen_fn gen_mul = GEN_FCN (code); + gcc_assert (code != CODE_FOR_nothing); + + enum insn_code code1 = optab_handler (fma_optab, mode); + insn_gen_fn gen_fma = GEN_FCN (code1); + gcc_assert (code1 != CODE_FOR_nothing); + + enum insn_code code2 = optab_handler (fnma_optab, mode); + insn_gen_fn gen_fnma = GEN_FCN (code2); + gcc_assert (code2 != CODE_FOR_nothing); + + one = sw_64_load_constant_and_splat (mode, dconst1); + + /* x0 = 1./d estimate */ + + x0 = gen_reg_rtx (mode); + emit_insn ( + gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), UNSPEC_FRECX))); + + /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */ + if (passes > 1) + { + /* e0 = 1. - d * x0 */ + e0 = gen_reg_rtx (mode); + emit_insn (gen_fnma (e0, d, x0, one)); + + /* x1 = x0 + e0 * x0 */ + x1 = gen_reg_rtx (mode); + emit_insn (gen_fma (x1, x0, e0, x0)); + + for (i = 0, xprev = x1, eprev = e0; i < passes - 2; + ++i, xprev = xnext, eprev = enext) + { + /* enext = eprev * eprev */ + enext = gen_reg_rtx (mode); + emit_insn (gen_mul (enext, eprev, eprev)); + + /* xnext = xprev + enext * xprev */ + xnext = gen_reg_rtx (mode); + emit_insn (gen_fma (xnext, xprev, enext, xprev)); + } + } + else + xprev = x0; + + /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */ + /* u = n * xprev */ + u = gen_reg_rtx (mode); + emit_insn (gen_mul (u, n, xprev)); + + /* v = n - (d * u) */ + v = gen_reg_rtx (mode); + emit_insn (gen_fnma (v, d, u, n)); + + /* dst = (v * xprev) + u */ + emit_insn (gen_fma (dst, v, xprev, u)); +} + +int +enable_asan_check_stack () +{ + return asan_sanitize_stack_p (); +} + +static bool +sw_64_can_change_mode_class (machine_mode from, machine_mode to, + reg_class_t rclass) +{ + return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to) + || !reg_classes_intersect_p (FLOAT_REGS, rclass)); +} +bool +sw_64_slow_unaligned_access (machine_mode mode, unsigned int align) +{ + return (flag_sw_unalign_byte != 1 || TARGET_SW8A == 0); +} + +static bool +sw_64_macro_fusion_p () +{ + return (flag_sw_branch_fusion == 1); +} + +static bool +sw_64_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp) +{ + rtx src, dest; + enum rtx_code ccode; + rtx compare_set = NULL_RTX, test_if, cond; + rtx alu_set = NULL_RTX, addr = NULL_RTX; + if (get_attr_type (condjmp) != TYPE_IBR) + return false; + if (get_attr_type (condgen) != TYPE_ICMP) + return false; + compare_set = single_set (condgen); + if (compare_set == NULL_RTX) + { + int i; + rtx pat = PATTERN (condgen); + for (i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET) + { + rtx set_src = SET_SRC (XVECEXP (pat, 0, i)); + alu_set = XVECEXP (pat, 0, i); + } + } + if (compare_set == NULL_RTX) + return false; + src = SET_SRC (compare_set); + if (GET_CODE (src) == UNSPEC) + return false; + test_if = SET_SRC (pc_set (condjmp)); + cond = XEXP (test_if, 0); + ccode = GET_CODE (cond); + return true; +} + +/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ +static unsigned HOST_WIDE_INT +sw_64_asan_shadow_offset (void) +{ + return (HOST_WIDE_INT_1 << 49); +} + +static void +sw_64_sa_mask (unsigned long *imaskP, unsigned long *fmaskP) +{ + unsigned long imask = 0; + unsigned long fmask = 0; + unsigned int i; + + /* When outputting a thunk, we don't have valid register life info, + but assemble_start_function wants to output .frame and .mask + directives. */ + if (cfun->is_thunk) + { + *imaskP = 0; + *fmaskP = 0; + return; + } + +#ifdef SW_64_ENABLE_FULL_ASAN + if (frame_pointer_needed) + imask |= (1UL << HARD_FRAME_POINTER_REGNUM); +#endif + + /* One for every register we have to save. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (!fixed_regs[i] && !call_used_regs[i] && df_regs_ever_live_p (i) + && i != REG_RA) + { + if (i < 32) + imask |= (1UL << i); + else + fmask |= (1UL << (i - 32)); + } + + /* We need to restore these for the handler. */ + if (crtl->calls_eh_return) + { + for (i = 0;; ++i) + { + unsigned regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + imask |= 1UL << regno; + } + } + + /* If any register spilled, then spill the return address also. */ + /* ??? This is required by the Digital stack unwind specification + and isn't needed if we're doing Dwarf2 unwinding. */ + if (imask || fmask || sw_64_ra_ever_killed ()) + imask |= (1UL << REG_RA); + + *imaskP = imask; + *fmaskP = fmask; +} + +int +sw_64_sa_size (void) +{ + unsigned long mask[2]; + int sa_size = 0; + int i, j; + + sw_64_sa_mask (&mask[0], &mask[1]); + + for (j = 0; j < 2; ++j) + for (i = 0; i < 32; ++i) + if ((mask[j] >> i) & 1) + sa_size++; + + /* Our size must be even (multiple of 16 bytes). */ + if (sa_size & 1) + sa_size++; + return sa_size * 8; +} + +#if 1 +/* Sw64 stack frames generated by this compiler look like: + + +-------------------------------+ + | | + | incoming stack arguments | + | | + +-------------------------------+ + | | <-- incoming stack pointer (aligned) + | callee-allocated save area | + | for register varargs | + | | + +-------------------------------+ + | local variables | <-- frame_pointer_rtx + | | + +-------------------------------+ + | padding | + +-------------------------------+ + | callee-saved registers | frame.saved_regs_size + +-------------------------------+ + | FP' | + +-------------------------------+ + | RA' | + +-------------------------------+ <- hard_frame_pointer_rtx (aligned) + | padding | + +-------------------------------+ + | outgoing stack arguments | <-- arg_pointer + | | + +-------------------------------+ + | | <-- stack_pointer_rtx (aligned) + + The following registers are reserved during frame layout and should not be + used for any other purpose: + + TODO: add other register purpose + - r26(RA), r15(FP): Used by standard frame layout. + + These registers must be avoided in frame layout related code unless the + explicit intention is to interact with one of the features listed above. */ + +static void +sw_64_layout_frame (void) +{ + poly_int64 offset = 0; + + cfun->machine->frame.emit_frame_pointer + = frame_pointer_needed || crtl->calls_eh_return; + + unsigned HOST_WIDE_INT sa_mask = 0; + int sa_size; + + /* When outputting a thunk, we don't have valid register life info, + but assemble_start_function wants to output .frame and .mask + directives. */ + if (!cfun->is_thunk) + { + /* One for every register we have to save. */ + for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (!call_used_or_fixed_reg_p (i) && df_regs_ever_live_p (i) + && i != REG_RA) + sa_mask |= HOST_WIDE_INT_1U << i; + + /* We need to restore these for the handler. */ + if (crtl->calls_eh_return) + { + for (unsigned i = 0;; ++i) + { + unsigned regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + sa_mask |= HOST_WIDE_INT_1U << regno; + } + } + /* If any register spilled, then spill the return address also. */ + /* ??? This is required by the Digital stack unwind specification + and isn't needed if we're doing Dwarf2 unwinding. */ + if (sa_mask || sw_64_ra_ever_killed ()) + sa_mask |= HOST_WIDE_INT_1U << REG_RA; + } + sa_size = popcount_hwi (sa_mask); + poly_int64 frame_size = get_frame_size (); + + /* Our size must be even (multiple of 16 bytes). */ + if (sa_size & 1) + sa_size++; + sa_size *= 8; + + poly_int64 varargs_and_saved_regs_size + = sa_size + cfun->machine->frame.saved_varargs_size + + crtl->args.pretend_args_size; + + poly_int64 varargs_size + = cfun->machine->frame.saved_varargs_size + crtl->args.pretend_args_size; + + HOST_WIDE_INT extra_alignment + = SW_64_ROUND (frame_size + cfun->machine->frame.saved_varargs_size) + - cfun->machine->frame.saved_varargs_size; + + poly_int64 outgoing_args = SW_64_ROUND (crtl->outgoing_args_size); + + cfun->machine->frame.local_offset + = cfun->machine->frame.saved_varargs_size + crtl->args.pretend_args_size; + + poly_int64 total_size + = aligned_upper_bound (varargs_and_saved_regs_size + frame_size, + STACK_BOUNDARY / BITS_PER_UNIT) + + outgoing_args; + + cfun->machine->frame.hard_frame_pointer_offset + = aligned_upper_bound (varargs_and_saved_regs_size + frame_size, + STACK_BOUNDARY / BITS_PER_UNIT); + + // TODO: does sw64 need this feild? + cfun->machine->frame.callee_offset + = cfun->machine->frame.hard_frame_pointer_offset; + + cfun->machine->frame.arg_pointer_offset = total_size - varargs_size; + + cfun->machine->frame.sa_mask = sa_mask; + cfun->machine->frame.saved_regs_size = sa_size; + cfun->machine->frame.frame_size = total_size; +} +#endif + +/* Define the offset between two registers, one to be eliminated, + and the other its replacement, at the start of a routine. */ + +HOST_WIDE_INT +sw_64_initial_elimination_offset (unsigned int from, + unsigned int to ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT ret; +#ifdef SW_64_ENABLE_FULL_ASAN + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) + { + // TODO: in sw64 variable arguments processing, all regs + // and pretending arguments offset a passive, so we have + // to minus varargs size. May be fix it is a better way? + return cfun->machine->frame.hard_frame_pointer_offset + - cfun->machine->frame.local_offset; + } + + if (from == FRAME_POINTER_REGNUM) + { + return cfun->machine->frame.hard_frame_pointer_offset + - cfun->machine->frame.local_offset; + } + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) + { + // TODO: same as HARD_FRAME_POINTER_REGNUM; + return cfun->machine->frame.arg_pointer_offset; + } + if (from == FRAME_POINTER_REGNUM) + { + return cfun->machine->frame.arg_pointer_offset; + } + } + + return cfun->machine->frame.frame_size; +#else + ret = sw_64_sa_size (); + if (!frame_pointer_needed) + ret += SW_64_ROUND (crtl->outgoing_args_size); + + switch (from) + { + case FRAME_POINTER_REGNUM: + break; + + case ARG_POINTER_REGNUM: + ret += (SW_64_ROUND (get_frame_size () + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size); + break; + + default: + gcc_unreachable (); + } + + return ret; +#endif +} + +/* Compute the frame size. SIZE is the size of the "naked" frame + and SA_SIZE is the size of the register save area. */ + +static HOST_WIDE_INT +compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size) +{ +#ifdef SW_64_ENABLE_FULL_ASAN + // sw_64_layout_frame (); + return cfun->machine->frame.frame_size; +#else + return SW_64_ROUND (crtl->outgoing_args_size) + sa_size + + SW_64_ROUND (size + crtl->args.pretend_args_size); +#endif +} + +/* Initialize the GCC target structure. */ +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P sw_64_in_small_data_p + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" + +/* Default unaligned ops are provided for ELF systems. To get unaligned + data for non-ELF systems, we have to turn off auto alignment. */ +#undef TARGET_ASM_RELOC_RW_MASK +#define TARGET_ASM_RELOC_RW_MASK sw_64_elf_reloc_rw_mask +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION sw_64_elf_select_rtx_section +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS sw_64_elf_section_type_flags + +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE sw_64_output_function_end_prologue + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS sw_64_init_libfuncs + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS sw_64_legitimize_address +#undef TARGET_MODE_DEPENDENT_ADDRESS_P +#define TARGET_MODE_DEPENDENT_ADDRESS_P sw_64_mode_dependent_address_p + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START sw_64_file_start + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST sw_64_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE sw_64_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + sw_64_multipass_dfa_lookahead + +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS HAVE_AS_TLS + +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL sw_64_builtin_decl +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS sw_64_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN sw_64_expand_builtin +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN sw_64_fold_builtin +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN sw_64_gimple_fold_builtin + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL sw_64_function_ok_for_sibcall +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P sw_64_cannot_copy_insn_p +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P sw_64_legitimate_constant_p +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM sw_64_cannot_force_const_mem + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK sw_64_output_mi_thunk_osf +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ + hook_bool_const_tree_hwi_hwi_const_tree_true +#undef TARGET_STDARG_OPTIMIZE_HOOK +#define TARGET_STDARG_OPTIMIZE_HOOK sw_64_stdarg_optimize_hook + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND sw_64_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS sw_64_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sw_64_print_operand_punct_valid_p + +/* Use 16-bits anchor. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1 +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET 0x7fff +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST sw_64_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST sw_64_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS sw_64_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG sw_64_reorg + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE \ + default_promote_function_mode_always_promote +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE sw_64_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE sw_64_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P sw_64_function_value_regno_p +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY sw_64_return_in_memory +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE sw_64_pass_by_reference +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS sw_64_setup_incoming_varargs +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED +#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG sw_64_split_complex_arg +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR sw_64_gimplify_va_arg +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES sw_64_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG sw_64_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE sw_64_function_arg_advance +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT sw_64_trampoline_init + +#undef TARGET_INSTANTIATE_DECLS +#define TARGET_INSTANTIATE_DECLS sw_64_instantiate_decls + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD sw_64_secondary_reload +#undef TARGET_SECONDARY_MEMORY_NEEDED +#define TARGET_SECONDARY_MEMORY_NEEDED sw_64_secondary_memory_needed +#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE +#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sw_64_secondary_memory_needed_mode + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P sw_64_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P sw_64_vector_mode_supported_p + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST sw_64_build_builtin_va_list + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START sw_64_va_start + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE sw_64_option_override + +#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE +#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE sw_64_override_options_after_change + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE sw_64_mangle_type +#endif + +#undef TARGET_LRA_P +#define TARGET_LRA_P hook_bool_void_false + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P sw_64_legitimate_address_p + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE sw_64_conditional_register_usage + +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON sw_64_canonicalize_comparison + +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sw_64_atomic_assign_expand_fenv + +#undef TARGET_HARD_REGNO_MODE_OK +#define TARGET_HARD_REGNO_MODE_OK sw_64_hard_regno_mode_ok +#undef TARGET_SLOW_UNALIGNED_ACCESS +#define TARGET_SLOW_UNALIGNED_ACCESS sw_64_slow_unaligned_access +#undef TARGET_MODES_TIEABLE_P +#define TARGET_MODES_TIEABLE_P sw_64_modes_tieable_p + +#undef TARGET_CAN_CHANGE_MODE_CLASS +#define TARGET_CAN_CHANGE_MODE_CLASS sw_64_can_change_mode_class + +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P sw_64_macro_fusion_p + +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P sw_64_macro_fusion_pair_p +#undef TARGET_ASAN_SHADOW_OFFSET +#define TARGET_ASAN_SHADOW_OFFSET sw_64_asan_shadow_offset + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-sw-64.h" diff --git a/gcc/config/sw_64/sw_64.h b/gcc/config/sw_64/sw_64.h new file mode 100644 index 0000000000000000000000000000000000000000..8e3bb0241dae498109f15ca007a9e0eeb4240f06 --- /dev/null +++ b/gcc/config/sw_64/sw_64.h @@ -0,0 +1,999 @@ +/* Definitions of target machine for GNU compiler, for Sw_64. + Copyright (C) 1992-2020 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__sw_64"); \ + builtin_define ("__sw_64__"); \ + builtin_assert ("cpu=sw_64"); \ + builtin_assert ("machine=sw_64"); \ + if (TARGET_CIX) \ + { \ + builtin_define ("__sw_64_cix__"); \ + builtin_assert ("cpu=cix"); \ + } \ + if (TARGET_FIX) \ + { \ + builtin_define ("__sw_64_fix__"); \ + builtin_assert ("cpu=fix"); \ + } \ + if (TARGET_BWX) \ + { \ + builtin_define ("__sw_64_bwx__"); \ + builtin_assert ("cpu=bwx"); \ + } \ + if (TARGET_MAX) \ + { \ + builtin_define ("__sw_64_max__"); \ + builtin_assert ("cpu=max"); \ + } \ + if (sw_64_cpu_string) \ + { \ + if (strcmp (sw_64_cpu_string, "sw6a") == 0) \ + { \ + builtin_define ("__sw_64_sw6a__"); \ + builtin_assert ("cpu=sw6a"); \ + } \ + else if (strcmp (sw_64_cpu_string, "sw6b") == 0) \ + { \ + builtin_define ("__sw_64_sw6b__"); \ + builtin_assert ("cpu=sw6b"); \ + } \ + else if (strcmp (sw_64_cpu_string, "sw8a") == 0) \ + { \ + builtin_define ("__sw_64_sw8a__"); \ + builtin_assert ("cpu=sw8a"); \ + } \ + } \ + else /* Presumably sw6b. */ \ + { \ + builtin_define ("__sw_64_sw6b__"); \ + builtin_assert ("cpu=sw6b"); \ + } \ + if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT) \ + builtin_define ("_IEEE_FP"); \ + if (TARGET_IEEE_WITH_INEXACT) \ + builtin_define ("_IEEE_FP_INEXACT"); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + \ + /* Macros dependent on the C dialect. */ \ + SUBTARGET_LANGUAGE_CPP_BUILTINS (); \ + } \ + while (0) + +#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS +#define SUBTARGET_LANGUAGE_CPP_BUILTINS() \ + do \ + { \ + if (preprocessing_asm_p ()) \ + builtin_define_std ("LANGUAGE_ASSEMBLY"); \ + else if (c_dialect_cxx ()) \ + { \ + builtin_define ("__LANGUAGE_C_PLUS_PLUS"); \ + builtin_define ("__LANGUAGE_C_PLUS_PLUS__"); \ + } \ + else \ + builtin_define_std ("LANGUAGE_C"); \ + if (c_dialect_objc ()) \ + { \ + builtin_define ("__LANGUAGE_OBJECTIVE_C"); \ + builtin_define ("__LANGUAGE_OBJECTIVE_C__"); \ + } \ + } \ + while (0) +#endif + +/* Run-time compilation parameters selecting different hardware subsets. */ + +/* Which processor to schedule for. The cpu attribute defines a list that + mirrors this list, so changes to sw_64.md must be made at the same time. */ + +enum processor_type +{ + PROCESSOR_SW6, /* SW6 */ + PROCESSOR_SW8, /* SW8 */ + PROCESSOR_MAX +}; + +extern enum processor_type sw_64_cpu; +extern enum processor_type sw_64_tune; + +enum sw_64_trap_precision +{ + SW_64_TP_PROG, /* No precision (default). */ + SW_64_TP_FUNC, /* Trap contained within originating function. */ + SW_64_TP_INSN /* Instruction accuracy and code is resumption safe. */ +}; + +enum sw_64_fp_rounding_mode +{ + SW_64_FPRM_NORM, /* Normal rounding mode. */ + SW_64_FPRM_MINF, /* Round towards minus-infinity. */ + SW_64_FPRM_CHOP, /* Chopped rounding mode (towards 0). */ + SW_64_FPRM_DYN /* Dynamic rounding mode. */ +}; + +enum sw_64_fp_trap_mode +{ + SW_64_FPTM_N, /* Normal trap mode. */ + SW_64_FPTM_U, /* Underflow traps enabled. */ + SW_64_FPTM_SU, /* Software completion, w/underflow traps. */ + SW_64_FPTM_SUI /* Software completion, w/underflow & inexact traps. */ +}; + +extern enum sw_64_trap_precision sw_64_tp; +extern enum sw_64_fp_rounding_mode sw_64_fprm; +extern enum sw_64_fp_trap_mode sw_64_fptm; + +/* Invert the easy way to make options work. */ +#define TARGET_FP (!TARGET_SOFT_FP) + +/* Macros to silence warnings about numbers being signed in traditional + * C and unsigned in ISO C when compiled on 32-bit hosts. */ + +#define BITMASK_HIGH (((unsigned long) 1) << 31) /* 0x80000000. */ + +/* These are for target os support and cannot be changed at runtime. */ +#define TARGET_ABI_OPEN_VMS 0 +#define TARGET_ABI_OSF 1 + +#ifndef TARGET_CAN_FAULT_IN_PROLOGUE +#define TARGET_CAN_FAULT_IN_PROLOGUE 0 +#endif +#ifndef TARGET_HAS_XFLOATING_LIBS +#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128 +#endif +#ifndef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 0 +#endif +#ifndef HAVE_AS_TLS +#define HAVE_AS_TLS 0 +#endif + +#define TARGET_DEFAULT MASK_FPREGS + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS +#ifdef HAVE_AS_EXPLICIT_RELOCS +#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS +#define TARGET_SUPPORT_ARCH 1 +#else +#define TARGET_DEFAULT_EXPLICIT_RELOCS 0 +#endif +#endif + +#ifndef TARGET_SUPPORT_ARCH +#define TARGET_SUPPORT_ARCH 0 +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified. + --with-tune is ignored if -mtune is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}"}, \ + { \ + "tune", "%{!mtune=*:-mtune=%(VALUE)}" \ + } + +/* target machine storage layout */ + + +/* Define the size of `int'. The default is the same as the word size. */ +#define INT_TYPE_SIZE 32 + +#define LONG_TYPE_SIZE (TARGET_SW_M32 ? 32 : 64) + +/* Define the size of `long long'. The default is the twice the word size. */ +#define LONG_LONG_TYPE_SIZE 64 + +/* The two floating-point formats we support are S-floating, which is + 4 bytes, and T-floating, which is 8 bytes. `float' is S and `double' + and `long double' are T. */ + +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +/* Work around target_flags dependency in ada/targtyps.c. */ +#define WIDEST_HARDWARE_FP_SIZE 64 + +#define WCHAR_TYPE "unsigned int" +#define WCHAR_TYPE_SIZE 32 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. + + For Sw_64, we always store objects in a full register. 32-bit integers + are always sign-extended, but smaller objects retain their signedness. + + Note that small vector types can get mapped onto integer modes at the + whim of not appearing in sw_64-modes.def. We never promoted these + values before; don't do so now that we've trimmed the set of modes to + those actually implemented in the backend. */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (!TARGET_SW_M32 \ + && (GET_MODE_CLASS (MODE) == MODE_INT \ + && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE) \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)) \ + { \ + if ((MODE) == SImode) \ + (UNSIGNEDP) = 0; \ + (MODE) = DImode; \ + } + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. + + There are no such instructions on the Sw_64, but the documentation + is little endian. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + This is false on the Sw_64. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is lowest + numbered. + + For Sw_64 we can decide arbitrarily since there are no machine instructions + for them. Might as well be consistent with bytes. */ +#define WORDS_BIG_ENDIAN 0 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 8 + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +#define POINTER_SIZE (TARGET_SW_M32 ? 32 : 64) + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 64 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY ((TARGET_SW_SIMD || TARGET_SW_32ALIGN) ? 256 : 128) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 64 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#undef PCC_BITFILED_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT ((TARGET_SW_SIMD || TARGET_SW_32ALIGN) ? 256 : 128) +/* For atomic access to objects, must have at least 32-bit alignment + unless the machine has byte operations. */ +#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32)) + +/* Align all constants and variables to at least a word boundary so + we can pick up pieces of them faster. */ +/* ??? Only if block-move stuff knows about different source/destination + alignment. */ +#if 0 +#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD) +#endif + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. + + Since we get an error message when we do one, call them invalid. */ + +#define STRICT_ALIGNMENT 1 + +#define SW64_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \ + (((COND) && ((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (EXP) == ARRAY_TYPE || TREE_CODE (EXP) == UNION_TYPE \ + || TREE_CODE (EXP) == RECORD_TYPE)) \ + ? BITS_PER_WORD \ + : (ALIGN)) + +/* Similarly, make sure that objects on the stack are sensibly aligned. */ +#define LOCAL_ALIGNMENT(EXP, ALIGN) \ + SW64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN) + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + We define all 32 integer registers, even though $31 is always zero, + and all 32 floating-point registers, even though $f31 is also + always zero. We do not bother defining the FP status register and + there are no other registers. + + Since $31 is always zero, we will use register number 31 as the + argument pointer. It will never appear in the generated code + because we will always be eliminating it in favor of the stack + pointer or hardware frame pointer. + + Likewise, we use $f31 for the frame pointer, which will always + be eliminated in favor of the hardware frame pointer or the + stack pointer. */ + +#define FIRST_PSEUDO_REGISTER 64 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ + +#define FIXED_REGISTERS \ + { \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 \ + } + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ + { \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 \ + } + +/* List the order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. */ + +#define REG_ALLOC_ORDER \ + { \ + 1, 2, 3, 4, 5, 6, 7, 8, /* nonsaved integer registers */ \ + 22, 23, 24, 25, 28, /* likewise */ \ + 0, /* likewise, but return value */ \ + 21, 20, 19, 18, 17, 16, /* likewise, but input args */ \ + 27, /* likewise, but SYSV procedure value */ \ + \ + 42, 43, 44, 45, 46, 47, /* nonsaved floating-point registers */ \ + 54, 55, 56, 57, 58, 59, /* likewise */ \ + 60, 61, 62, /* likewise */ \ + 32, 33, /* likewise, but return values */ \ + 53, 52, 51, 50, 49, 48, /* likewise, but input args */ \ + \ + 9, 10, 11, 12, 13, 14, /* saved integer registers */ \ + 26, /* return address */ \ + 15, /* hard frame pointer */ \ + \ + 34, 35, 36, 37, 38, 39, /* saved floating-point registers */ \ + 40, 41, /* likewise */ \ + \ + 29, 30, 31, 63 /* gp, sp, ap, sfp */ \ + } + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* Sw_64 pc isn't overloaded on a register that the compiler knows about. */ +/* #define PC_REGNUM */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 30 + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 15 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 31 + +/* Base register for access to local variables of function. */ +#define FRAME_POINTER_REGNUM 63 + +/* Register in which static-chain is passed to a function. + + For the Sw_64, this is based on an example; the calling sequence + doesn't seem to specify this. */ +#define STATIC_CHAIN_REGNUM 1 + +/* The register number of the register used to address a table of + static data addresses in memory. */ +#define PIC_OFFSET_TABLE_REGNUM 29 + +/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' + is clobbered by calls. */ +/* ??? It is and it isn't. It's required to be valid for a given + function when the function returns. It isn't clobbered by + current_file functions. Moreover, we do not expose the ldgp + until after reload, so we're probably safe. */ +/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +enum reg_class +{ + NO_REGS, + R0_REG, + R24_REG, + R25_REG, + R27_REG, + GENERAL_REGS, + FLOAT_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ + { \ + "NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG", "GENERAL_REGS", \ + "FLOAT_REGS", "ALL_REGS" \ + } + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ + { \ + {0x00000000, 0x00000000}, /* NO_REGS */ \ + {0x00000001, 0x00000000}, /* R0_REG */ \ + {0x01000000, 0x00000000}, /* R24_REG */ \ + {0x02000000, 0x00000000}, /* R25_REG */ \ + {0x08000000, 0x00000000}, /* R27_REG */ \ + {0xffffffff, 0x80000000}, /* GENERAL_REGS */ \ + {0x00000000, 0x7fffffff}, /* FLOAT_REGS */ \ + { \ + 0xffffffff, 0xffffffff \ + } \ + } + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == 0 \ + ? R0_REG \ + : (REGNO) == 24 \ + ? R24_REG \ + : (REGNO) == 25 \ + ? R25_REG \ + : (REGNO) == 27 \ + ? R27_REG \ + : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS : GENERAL_REGS) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS NO_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ + +#define PREFERRED_RELOAD_CLASS sw_64_preferred_reload_class + +/* Provide the cost of a branch. Exact meaning under development. */ +#define BRANCH_COST(speed_p, predictable_p) 5 + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +//#define FRAME_GROWS_DOWNWARD SW_64_ENABLE_ASAN +#define FRAME_GROWS_DOWNWARD 1 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. + On Sw_64, don't define this because there are no push insns. */ +/* #define PUSH_ROUNDING(BYTES) */ + +/* Define this to be nonzero if stack checking is built into the ABI. */ +#define STACK_CHECK_BUILTIN 1 + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset of first parameter from the argument pointer register value. */ + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Definitions for register eliminations. + + We have two registers that can be eliminated on the Sw_64. First, the + frame pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the argument pointer register can always be + eliminated; it is replaced with either the stack or frame pointer. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. */ + +#define ELIMINABLE_REGS \ + { \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { \ + FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM \ + } \ + } + +/* Round up to a multiple of 16 bytes. */ +#define SW_64_ROUND(X) \ + ((TARGET_SW_32ALIGN || TARGET_SW_SIMD) ? ROUND_UP ((X), 32) \ + : ROUND_UP ((X), 16)) + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = sw_64_initial_elimination_offset (FROM, TO)) + +/* Define this if stack space is still allocated for a parameter passed + in a register. */ +/* #define REG_PARM_STACK_SPACE */ + +/* 1 if N is a possible register number for function argument passing. + On Sw_64, these are $16-$21 and $f16-$f21. */ + +#define FUNCTION_ARG_REGNO_P(N) \ + (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32)) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On Sw_64, this is a single integer, which is a number of words + of arguments scanned so far. + Thus 6 or more means all following args should go on the stack. */ + +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM) = 0 + +/* Define intermediate macro to compute + the size (in registers) of an argument. */ + +#define SW_64_ARG_SIZE(MODE, TYPE) \ + ((MODE) == TFmode || (MODE) == TCmode \ + ? 1 \ + : CEIL (((MODE) == BLKmode ? int_size_in_bytes (TYPE) \ + : GET_MODE_SIZE (MODE)), \ + UNITS_PER_WORD)) + +/* Make (or fake) .linkage entry for function call. + IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */ + +/* This macro defines the start of an assembly comment. */ + +#define ASM_COMMENT_START " #" + +/* This macro produces the initial definition of a function. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + sw_64_start_function (FILE, NAME, DECL); + +/* This macro closes up a function definition for the assembler. */ + +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL) \ + sw_64_end_function (FILE, NAME, DECL) + +/* Output any profiling code before the prologue. */ + +#define PROFILE_BEFORE_PROLOGUE 1 + +/* Never use profile counters. */ + +#define NO_PROFILE_COUNTERS 1 + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. Under SYSV, profiling is enabled + by simply passing -pg to the assembler and linker. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK 1 + +/* Define registers used by the epilogue and return instruction. */ + +#define EPILOGUE_USES(REGNO) ((REGNO) == 26) + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE 32 + +/* The alignment of a trampoline, in bits. */ + +#define TRAMPOLINE_ALIGNMENT 64 + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. + FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of + the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined. */ + +#define RETURN_ADDR_RTX sw_64_return_addr + +/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders + can use DWARF_ALT_FRAME_RETURN_COLUMN defined below. This is just the same + as the default definition in dwarf2out.c. */ +#undef DWARF_FRAME_REGNUM +#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG) + +/* Before the prologue, RA lives in $26. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 26) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26) +#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64) +#define DWARF_ZERO_REG 31 + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 16 : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 28) +#define EH_RETURN_HANDLER_RTX \ + gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx, \ + crtl->outgoing_args_size)) + +/* Addressing modes, and classification of registers for them. */ + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in reginfo.c during register + allocation. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) 0 +#define REGNO_OK_FOR_BASE_P(REGNO) \ + ((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32 || (REGNO) == 63 \ + || reg_renumber[REGNO] == 63) + +/* Maximum number of registers that can appear in a valid memory address. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* Recognize any constant value that is a valid address. For the Sw_64, + there are only constants none since we want to use LDI to load any + symbolic addresses into registers. */ + +#define CONSTANT_ADDRESS_P(X) \ + (CONST_INT_P (X) && ((UINTVAL (X) + 0x8000) < 0x10000)) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) 0 + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define NONSTRICT_REG_OK_FOR_BASE_P(X) \ + (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* ??? Nonzero if X is the frame pointer, or some virtual register + that may eliminate to the frame pointer. These will be allowed to + have offsets greater than 32K. This is done because register + elimination offsets will change the hi/lo split, and if we split + before reload, we will require additional instructions. */ +#define NONSTRICT_REG_OK_FP_BASE_P(X) \ + (REGNO (X) == 31 || REGNO (X) == 63 \ + || (REGNO (X) >= FIRST_PSEUDO_REGISTER \ + && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER)) + +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P (X) +#else +#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P (X) +#endif + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ + do \ + { \ + rtx new_x \ + = sw_64_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ + } \ + while (0) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE SImode + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + + Do not define this if the table should contain absolute addresses. + On the Sw_64, the table is really GP-relative, not relative to the PC + of the table, but we pretend that it is PC-relative; this should be OK, + but we should try to find some better way sometime. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Max number of bytes we can move to or from memory + in one reasonably fast instruction. */ + +#define MOVE_MAX 8 + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. + + Without byte/word accesses, we want no more than four instructions; + with, several single byte accesses are better. */ + +#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2) + +/* Largest number of bytes of an object that can be placed in a register. + On the Sw_64 we have plenty of registers, so use TImode. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode) + +/* Nonzero if access to memory by bytes is no faster than for words. + Also nonzero if doing byte operations (specifically shifts) in registers + is undesirable. + + On the Sw_64, we want to not use the byte operation and instead use + masking operations to access fields; these will save instructions. */ + +#define SLOW_BYTE_ACCESS 1 + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS 1 + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND) + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +/* The CIX ctlz and cttz instructions return 64 for zero. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = 64, TARGET_CIX ? 1 : 0) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = 64, TARGET_CIX ? 1 : 0) + +/* Define the value returned by a floating-point comparison instruction. */ + +#define FLOAT_STORE_FLAG_VALUE(MODE) \ + REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE)) + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode (TARGET_SW_M32 ? SImode : DImode) + +/* Mode of a function address in a call instruction (for indexing purposes). */ + +#define FUNCTION_MODE Pmode + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. + + We define this on the Sw_64 so that gen_call and gen_call_value + get to see the SYMBOL_REF (for the hint field of the jsr). It will + then copy it into a register, thus actually letting the address be + cse'ed. */ + +#define NO_FUNCTION_CSE 1 + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Control the assembler format that we output. */ + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "") + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "") + +#define TEXT_SECTION_ASM_OP "\t.text" + +/* Output before writable data. */ + +#define DATA_SECTION_ASM_OP "\t.data" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ + { \ + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", "$10", "$11", \ + "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", "$20", "$21", \ + "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP", \ + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", \ + "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", \ + "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", \ + "$f28", "$f29", "$f30", "FP" \ + } + +/* Strip name encoding when emitting labels. */ + +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ + do \ + { \ + const char *name_ = NAME; \ + if (*name_ == '@' || *name_ == '%') \ + name_ += 2; \ + if (*name_ == '*') \ + name_++; \ + else \ + fputs (user_label_prefix, STREAM); \ + fputs (name_, STREAM); \ + } \ + while (0) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +/* Use dollar signs rather than periods in special g++ assembler names. */ + +#undef NO_DOLLAR_IN_LABEL + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + sprintf ((LABEL), "*$%s%ld", (PREFIX), (long) (NUM)) + +/* This is how to output an element of a case-vector that is relative. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + fprintf (FILE, "\t.gprel32 $L%d\n", (VALUE)) + +/* If we use NM, pass -g to it so it only lists globals. */ +#define NM_FLAGS "-pg" + +/* Definitions for debugging. */ + +/* Correct the offset of automatic variables and arguments. Note that + the Sw_64 debug format wants all automatic variables and arguments + to be in terms of two different offsets from the virtual frame pointer, + which is the stack pointer before any adjustment in the function. + The offset for the argument pointer is fixed for the native compiler, + it is either zero (for the no arguments case) or large enough to hold + all argument registers. + The offset for the auto pointer is the fourth argument to the .frame + directive (local_offset). + To stay compatible with the native tools we use the same offsets + from the virtual frame pointer and adjust the debugger arg/auto offsets + accordingly. These debugger offsets are set up in output_prolog. */ + +extern long sw_64_arg_offset; +extern long sw_64_auto_offset; +#define DEBUGGER_AUTO_OFFSET(X) \ + ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + sw_64_auto_offset) +#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + sw_64_arg_offset) + +#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME) \ + sw_64_output_filename (STREAM, NAME) + +/* By default, turn on GDB extensions. */ +#define DEFAULT_GDB_EXTENSIONS 1 + +/* This version don't define SYSTEM_IMPLICIT_EXTERN_C Replace + * NO_IMPLICIT_EXTERN_C with SYSTEM_IMPLICIT_EXTERN_C. */ +/* The system headers under Sw_64 systems are generally C++-aware. */ +/*#define NO_IMPLICIT_EXTERN_C*/ + +#define TARGET_SUPPORTS_WIDE_INT 1 +#define SW64_TARGET_SUPPORT_FPCR 1 + +#define HAVE_POST_INCREMENT (TARGET_SW8A ? 1 : 0) +#define HAVE_POST_DECREMENT (TARGET_SW8A ? 1 : 0) +#define HAVE_POST_MODIFY_DISP (TARGET_SW8A ? 1 : 0) +int +enable_asan_check_stack (); +#ifndef SW_64_ENABLE_ASAN +#define SW_64_ENABLE_FULL_ASAN 1 +#else +#undef SW_64_ENABLE_FULL_ASAN +#define SW_64_ENABLE_ASAN 0 +#endif +#define TARGET_CRC32 0 diff --git a/gcc/config/sw_64/sw_64.md b/gcc/config/sw_64/sw_64.md new file mode 100644 index 0000000000000000000000000000000000000000..8cf036b1007a428ca68411e7124bd7f3ab9eae91 --- /dev/null +++ b/gcc/config/sw_64/sw_64.md @@ -0,0 +1,7865 @@ +;; Machine description for Sw_64 for GNU C compiler +;; Copyright (C) 1992-2020 Free Software Foundation, Inc. +;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; Uses of UNSPEC in this file: + +(define_c_enum "unspec" [ + UNSPEC_XFLT_COMPARE + UNSPEC_ARG_HOME + UNSPEC_LDGP1 + UNSPEC_INSXH + UNSPEC_MSKXH + UNSPEC_CVTQL + UNSPEC_CVTLQ + UNSPEC_LDGP2 + UNSPEC_LITERAL + UNSPEC_LITUSE + UNSPEC_SIBCALL + UNSPEC_SYMBOL + UNSPEC_FRINTZ + UNSPEC_FRINTP + UNSPEC_FRINTG + UNSPEC_FRINTN + UNSPEC_FRINTI + UNSPEC_FRECX + + + ;; TLS Support + UNSPEC_TLSGD_CALL + UNSPEC_TLSLDM_CALL + UNSPEC_TLSGD + UNSPEC_TLSLDM + UNSPEC_DTPREL + UNSPEC_TPREL + UNSPEC_TP + UNSPEC_TLSRELGOT + UNSPEC_GOTDTPREL + + ;; Builtins + UNSPEC_CMPBGE + UNSPEC_ZAP + UNSPEC_AMASK + UNSPEC_IMPLVER + UNSPEC_PERR + UNSPEC_COPYSIGN + UNSPEC_PFSC + UNSPEC_PFTC + UNSPEC_SBT + UNSPEC_CBT + UNSPEC_FIMOVS ; SHENJQ20230404_RSQRT + + ;; Atomic operations + UNSPEC_MB + UNSPEC_ATOMIC + UNSPEC_CMPXCHG + UNSPEC_XCHG + UNSPECV_LDGP2 + UNSPECV_HARDWARE_PREFETCH_CNT + +]) + +;; UNSPEC_VOLATILE: + +(define_c_enum "unspecv" [ + UNSPECV_IMB + UNSPECV_BLOCKAGE + UNSPECV_SPECULATION_BARRIER + UNSPECV_SETJMPR ; builtin_setjmp_receiver + UNSPECV_LONGJMP ; builtin_longjmp + UNSPECV_TRAPB + UNSPECV_PSPL ; prologue_stack_probe_loop + UNSPECV_REALIGN + UNSPECV_EHR ; exception_receiver + UNSPECV_MCOUNT + UNSPECV_FORCE_MOV + UNSPECV_LDGP1 + UNSPECV_PLDGP2 ; prologue ldgp + UNSPECV_SET_TP + UNSPECV_RPCC + UNSPECV_SETJMPR_ER ; builtin_setjmp_receiver fragment + UNSPECV_LL ; load-locked + UNSPECV_SC ; store-conditional + UNSPECV_CMPXCHG + + UNSPEC_TIE ;; TIE +]) + +;; CQImode must be handled the similarly to HImode +;; when generating reloads. +(define_mode_iterator RELOAD12 [QI HI CQI]) +(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")]) + +;; Other mode iterators +(define_mode_iterator IMODE [QI HI SI DI]) +(define_mode_iterator I12MODE [QI HI]) +(define_mode_iterator I124MODE [QI HI SI]) +(define_mode_iterator I24MODE [HI SI]) +(define_mode_iterator I248MODE [HI SI DI]) +(define_mode_iterator I48MODE [SI DI]) + +(define_mode_attr DWI [(SI "DI") (DI "TI")]) +(define_mode_attr modesuffix [(QI "b") (HI "h") (SI "w") (DI "l") + (V8QI "b8") (V4HI "w4") + (SF "%,") (DF "%-")]) +(define_mode_attr vecmodesuffix [(QI "b8") (HI "w4")]) + +(define_code_iterator any_maxmin [smax smin umax umin]) + +(define_code_attr maxmin [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) + +(define_mode_iterator SFDF [SF DF]) +(define_mode_attr SD [(SF "s") (DF "d")]) +(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTN + UNSPEC_FRINTG UNSPEC_FRINTI]) + +;; Standard pattern names for floating-point rounding instructions. +(define_int_attr frint_pattern [(UNSPEC_FRINTZ "btrunc") + (UNSPEC_FRINTP "ceil") + (UNSPEC_FRINTN "floor") + (UNSPEC_FRINTI "nearbyint") + (UNSPEC_FRINTG "round")]) + +;; frint suffix for floating-point rounding instructions. +(define_int_attr frint_suffix [(UNSPEC_FRINTZ "_z") + (UNSPEC_FRINTP "_p") + (UNSPEC_FRINTN "_n") + (UNSPEC_FRINTG "_g") + (UNSPEC_FRINTI "")]) +;; endif + +;; Where necessary, the suffixes _le and _be are used to distinguish between +;; little-endian and big-endian patterns. +;; +;; Note that the Unicos/Mk assembler does not support the following +;; opcodes: mov, fmov, nop, fnop, unop. + +;; Processor type -- this attribute must exactly match the processor_type +;; enumeration in sw_64.h. + +(define_attr "tune" "sw6,sw8" + (const (symbol_ref "((enum attr_tune) sw_64_tune)"))) + +;; Define an insn type attribute. This is used in function unit delay +;; computations, among other purposes. For the most part, we use the names +;; defined in the documentation, but add a few that we have to know about +;; separately. + +(define_attr "type" + "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,call,iadd,ilog,shift,icmov,fcmov, + icmp,imul,fadd,fmul,fmadd,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c, + multi,vld,vst,ctpop,none,jsr,vcmp,frint,fp,fminmax,vsum,vinv,vsel,crc32,crc32c" + (const_string "iadd")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "multi")]) + +;; Define the operand size an insn operates on. Used primarily by mul +;; and div operations that have size dependent timings. + +(define_attr "opsize" "si,di,udi" + (const_string "di")) + +;; The TRAP attribute marks instructions that may generate traps +;; (which are imprecise and may need a trapb if software completion +;; is desired). + +(define_attr "trap" "no,yes" + (const_string "no")) + +;; The ROUND_SUFFIX attribute marks which instructions require a +;; rounding-mode suffix. The value NONE indicates no suffix, +;; the value NORMAL indicates a suffix controlled by sw_64_fprm. + +(define_attr "round_suffix" "none,normal,c" + (const_string "none")) + +;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix: +;; NONE no suffix +;; SU accepts only /su (cmpt et al) +;; SUI accepts only /sui (cvtqt and cvtqs) +;; V_SV accepts /v and /sv (cvtql only) +;; V_SV_SVI accepts /v, /sv and /svi (cvttq only) +;; U_SU_SUI accepts /u, /su and /sui (most fp instructions) +;; +;; The actual suffix emitted is controlled by sw_64_fptm. + +(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui" + (const_string "none")) + +;; The length of an instruction sequence in bytes. + +(define_attr "length" "" + (const_int 4)) + +;; The USEGP attribute marks instructions that have relocations that use +;; the GP. + +(define_attr "usegp" "no,yes" + (cond [(eq_attr "type" "ldsym,call") + (const_string "yes") + (eq_attr "type" "ild,fld,ist,fst") + (symbol_ref "((enum attr_usegp) sw_64_find_lo_sum_using_gp (insn))") + ] + (const_string "no"))) + +;; The CANNOT_COPY attribute marks instructions with relocations that +;; cannot easily be duplicated. This includes insns with gpdisp relocs +;; since they have to stay in 1-1 correspondence with one another. This +;; also includes call insns, since they must stay in correspondence with +;; the immediately following gpdisp instructions. + +(define_attr "cannot_copy" "false,true" + (const_string "false")) + +;; Used to control the "enabled" attribute on a per-instruction basis. +;; For convenience, conflate ABI issues re loading of addresses with +;; an "isa". +(define_attr "isa" "base,bwx,max,fix,cix,vms,ner,er,sw6a,sw6b,sw8a" + (const_string "base")) + +(define_attr "enabled" "" + (cond [(eq_attr "isa" "bwx") (symbol_ref "TARGET_BWX") + (eq_attr "isa" "max") (symbol_ref "TARGET_MAX") + (eq_attr "isa" "fix") (symbol_ref "TARGET_FIX") + (eq_attr "isa" "cix") (symbol_ref "TARGET_CIX") + (eq_attr "isa" "vms") (symbol_ref "!TARGET_ABI_OSF") + (eq_attr "isa" "ner") (symbol_ref "!TARGET_EXPLICIT_RELOCS") + (eq_attr "isa" "er") (symbol_ref "TARGET_EXPLICIT_RELOCS") + (eq_attr "isa" "sw6a") (symbol_ref "TARGET_SW6A") + (eq_attr "isa" "sw6b") (symbol_ref "TARGET_SW6B") + (eq_attr "isa" "sw8a") (symbol_ref "TARGET_SW8A") + ] + (const_int 1))) + +;; Include scheduling descriptions. + +(include "sw6.md") +(include "sw8.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; First define the arithmetic insns. Note that the 32-bit forms also +;; sign-extend. + +;; Handle 32-64 bit extension from memory to a floating point register +;; specially, since this occurs frequently in int->double conversions. +;; +;; Note that while we must retain the =f case in the insn for reload's +;; benefit, it should be eliminated after reload, so we should never emit +;; code for that case. But we don't reject the possibility. + +(define_expand "extendsidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]) + +(define_insn "*cvtlq" + [(set (match_operand:DI 0 "register_operand" "=f") + (unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTLQ))] + "" + "fcvtwl %1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*extendsidi2_1" + [(set (match_operand:DI 0 "register_operand" "=r,r,!*f") + (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))] + "" + "@ + addw $31,%1,%0 + ldw%U1 %0,%1 + flds %0,%1\;fcvtwl %0,%0" + [(set_attr "type" "iadd,ild,fld") + (set_attr "length" "*,*,8")]) + +(define_split + [(set (match_operand:DI 0 "hard_fp_register_operand") + (sign_extend:DI (match_operand:SI 1 "memory_operand")))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0])); +}) + +;; Optimize sign-extension of SImode loads. This shows up in the wake of +;; reload when converting fp->int. + +(define_peephole2 + [(set (match_operand:SI 0 "hard_int_register_operand") + (match_operand:SI 1 "memory_operand")) + (set (match_operand:DI 2 "hard_int_register_operand") + (sign_extend:DI (match_dup 0)))] + "true_regnum (operands[0]) == true_regnum (operands[2]) + || peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (sign_extend:DI (match_dup 1)))]) + +(define_peephole2 +[ +(set (match_operand:DF 0 "register_operand") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "register_operand") + (match_operand:DF 3 "const0_operand")])) +(set (match_operand:DF 4 "register_operand") + (match_operator:DF 5 "sw_64_fp_comparison_operator" + [(match_operand:DF 6 "reg_or_0_operand") + (match_operand:DF 7 "reg_or_0_operand")])) +(set (match_operand:SFDF 8 "register_operand") + (if_then_else:SFDF + (match_operand 9 "comparison_operator") + (match_operand:SFDF 10 "reg_or_8bit_operand") + (match_operand:SFDF 11 "reg_or_8bit_operand"))) +] +"(GET_CODE (operands[1])==LE || GET_CODE (operands[1])==LT) + && GET_CODE (operands[5])==EQ && GET_CODE (operands[9])==NE && flag_sw_fselect +" + +[ +(set (match_operand:SFDF 8 "reg_or_0_operand") + (if_then_else:SFDF + (match_operator 1 "sw_64_fp_comparison_operator" + [(match_operand:SFDF 2 "reg_or_0_operand") + (match_operand:SFDF 3 "const0_operand")]) + (match_operand:SFDF 11 "reg_or_0_operand") + (match_operand:SFDF 10 "reg_or_0_operand"))) +] +) +(define_peephole2 +[ +(set (match_operand:DF 0 "register_operand") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "const0_operand") + (match_operand:DF 3 "reg_or_0_operand")])) +(set (match_operand:DF 4 "register_operand") + (match_operator:DF 5 "sw_64_fp_comparison_operator" + [(match_operand:DF 6 "reg_or_0_operand") + (match_operand:DF 7 "reg_or_0_operand")])) +(set (match_operand:SFDF 8 "register_operand") + (if_then_else:SFDF + (match_operand 9 "comparison_operator") + (match_operand:SFDF 10 "reg_or_8bit_operand") + (match_operand:SFDF 11 "reg_or_8bit_operand"))) +] +"(GET_CODE (operands[1])==LE || GET_CODE (operands[1])==LT) + && GET_CODE (operands[5])==EQ && GET_CODE (operands[9])==NE && flag_sw_fselect +" + +[ +(set (match_operand:SFDF 8 "reg_or_0_operand") + (if_then_else:SFDF + (match_operator 1 "sw_64_fp_comparison_operator" + [(match_operand:SFDF 3 "reg_or_0_operand") + (match_operand:SFDF 2 "const0_operand")]) + (match_operand:SFDF 10 "reg_or_0_operand") + (match_operand:SFDF 11 "reg_or_0_operand"))) +] +) + +(define_peephole2 +[ +(set (match_operand:DF 0 "register_operand") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "register_operand") + (match_operand:DF 3 "const0_operand")])) +(set (match_operand:DF 4 "register_operand") + (match_operator:DF 5 "sw_64_fp_comparison_operator" + [(match_operand:DF 6 "register_operand") + (match_operand:DF 7 "const0_operand")])) +(set (match_operand:SFDF 8 "register_operand") + (if_then_else:SFDF + (match_operand 9 "comparison_operator") + (match_operand:SFDF 10 "reg_or_8bit_operand") + (match_operand:SFDF 11 "reg_or_8bit_operand"))) +] +"GET_CODE (operands[1])==EQ && GET_CODE (operands[5])==EQ && + (GET_CODE (operands[9])==NE || GET_CODE (operands[9])==EQ)&& + (operands[0] == operands[6]) && flag_sw_fselect" +[ +(set (match_operand:SFDF 8 "reg_or_0_operand") + (if_then_else:SFDF + (match_operator 9 "sw_64_fp_comparison_operator" + [(match_operand:SFDF 2 "reg_or_0_operand") + (match_operand:SFDF 3 "const0_operand")]) + (match_operand:SFDF 10 "reg_or_0_operand") + (match_operand:SFDF 11 "reg_or_0_operand"))) +] +) + + + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ") + (match_operand:SI 2 "add_operand" "rI,O,K,L")))] + "" + "@ + addw %r1,%2,%0 + subw %r1,%n2,%0 + ldi %0,%2(%r1) + ldih %0,%h2(%r1)") + +(define_split + [(set (match_operand:SI 0 "register_operand") + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "! add_operand (operands[2], SImode)" + [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000); + HOST_WIDE_INT rest = val - low; + + operands[3] = GEN_INT (rest); + operands[4] = GEN_INT (low); +}) + +(define_insn "*addsi_se" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "sext_add_operand" "rI,O"))))] + "" + "@ + addw %r1,%2,%0 + subw %r1,%n2,%0") + +(define_insn "*addsi_se2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "sext_add_operand" "rI,O")) + 0)))] + "" + "@ + addw %r1,%2,%0 + subw %r1,%n2,%0") + +;; (plus:SI (ashift:SI (match_dup 3)-> (plus:SI (mult:SI (match_dup 3) +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "reg_not_elim_operand") + (match_operand:SI 2 "const_int_operand")))) + (clobber (match_operand:SI 3 "reg_not_elim_operand"))] + "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) % 4 == 0" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3) + (match_dup 5)) + (match_dup 1))))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]) / 4; + int mult = 4; + + if (val % 2 == 0) + val /= 2, mult = 8; + + operands[4] = GEN_INT (val); + operands[5] = GEN_INT (mult); +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI + (plus:SI (match_operator:SI 1 "comparison_operator" + [(match_operand 2) + (match_operand 3)]) + (match_operand:SI 4 "add_operand")))) + (clobber (match_operand:DI 5 "register_operand"))] + "" + [(set (match_dup 5) (match_dup 6)) + (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))] +{ + operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, + operands[2], operands[3]); + operands[7] = gen_lowpart (SImode, operands[5]); +}) + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "add_operand")))]) + +(define_insn "*adddi_er_lo16_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "dtp16_symbolic_operand")))] + "HAVE_AS_TLS" + "ldi %0,%2(%1)\t\t!dtprel") + +(define_insn "*adddi_er_hi32_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "dtp32_symbolic_operand"))))] + "HAVE_AS_TLS" + "ldih %0,%2(%1)\t\t!dtprelhi") + +(define_insn "*adddi_er_lo32_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "dtp32_symbolic_operand")))] + "HAVE_AS_TLS" + "ldi %0,%2(%1)\t\t!dtprello") + +(define_insn "*adddi_er_lo16_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "tp16_symbolic_operand")))] + "HAVE_AS_TLS" + "ldi %0,%2(%1)\t\t!tprel") + +(define_insn "*adddi_er_hi32_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "tp32_symbolic_operand"))))] + "HAVE_AS_TLS" + "ldih %0,%2(%1)\t\t!tprelhi") + +(define_insn "*adddi_er_lo32_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "tp32_symbolic_operand")))] + "HAVE_AS_TLS" + "ldi %0,%2(%1)\t\t!tprello") + +(define_insn "*adddi_er_high_l" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "local_symbolic_operand"))))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + "ldih %0,%2(%1)\t\t!gprelhigh" + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (high:DI (match_operand:DI 1 "local_symbolic_operand")))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:DI (match_dup 2) (high:DI (match_dup 1))))] + "operands[2] = pic_offset_table_rtx;") + +;; We used to expend quite a lot of effort choosing addl/subl/ldi. +;; With complications like +;; +;; The NT stack unwind code can't handle a subl to adjust the stack +;; (that's a bug, but not one we can do anything about). As of NT4.0 SP3, +;; the exception handling code will loop if a subl is used and an +;; exception occurs. +;; +;; The 19980616 change to emit prologues as RTL also confused some +;; versions of GDB, which also interprets prologues. This has been +;; fixed as of GDB 4.18, but it does not harm to unconditionally +;; use ldi here. +;; +;; and the fact that the three insns schedule exactly the same, it's +;; just not worth the effort. + +(define_insn "*adddi_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r,r") + (match_operand:DI 2 "add_operand" "r,K,L")))] + "" + "@ + addl %1,%2,%0 + ldi %0,%2(%1) + ldih %0,%h2(%1)") + +;; ??? Allow large constants when basing off the frame pointer or some +;; virtual register that may eliminate to the frame pointer. This is +;; done because register elimination offsets will change the hi/lo split, +;; and if we split before reload, we will require additional instructions. + +(define_insn "*adddi_fp_hack" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r") + (match_operand:DI 2 "const_int_operand" "K,L,n")))] + "NONSTRICT_REG_OK_FP_BASE_P (operands[1]) + && INTVAL (operands[2]) >= 0 + /* This is the largest constant an ldi+ldih pair can add, minus + an upper bound on the displacement between SP and AP during + register elimination. See INITIAL_ELIMINATION_OFFSET. */ + && INTVAL (operands[2]) + < (0x7fff8000 + - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD + - SW_64_ROUND (crtl->outgoing_args_size) + - (SW_64_ROUND (get_frame_size () + + max_reg_num () * UNITS_PER_WORD + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size))" + "@ + ldi %0,%2(%1) + ldih %0,%h2(%1) + #") + +;; Don't do this if we are adjusting SP since we don't want to do it +;; in two steps. Don't split FP sources for the reason listed above. +(define_split + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_int_operand")))] + "! add_operand (operands[2], DImode) + && operands[0] != stack_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[1] != arg_pointer_rtx" + [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000); + HOST_WIDE_INT rest = val - low; + rtx rest_rtx = GEN_INT (rest); + + operands[4] = GEN_INT (low); + if (satisfies_constraint_L (rest_rtx)) + operands[3] = rest_rtx; + else if (can_create_pseudo_p ()) + { + operands[3] = gen_reg_rtx (DImode); + emit_move_insn (operands[3], operands[2]); + emit_insn (gen_adddi3 (operands[0], operands[1], operands[3])); + DONE; + } + else + FAIL; +}) + +; *sadd->*saddl/*saddq +(define_insn "*saddl" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI + (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") + (match_operand:SI 2 "const48_operand" "I,I")) + (match_operand:SI 3 "sext_add_operand" "rI,O")))] + "" + "@ + s%2addw %1,%3,%0 + s%2subw %1,%n3,%0") + +(define_insn "*saddq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI + (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r") + (match_operand:DI 2 "const48_operand" "I,I")) + (match_operand:DI 3 "sext_add_operand" "rI,O")))] + "" + "@ + s%2addl %1,%3,%0 + s%2subl %1,%n3,%0") + +(define_insn "*saddl_se" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI + (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") + (match_operand:SI 2 "const48_operand" "I,I")) + (match_operand:SI 3 "sext_add_operand" "rI,O"))))] + "" + "@ + s%2addw %1,%3,%0 + s%2subw %1,%n3,%0") + +(define_insn "*sxaddw" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI + (subreg:SI + (ashift:DI + (subreg:DI (match_operand:SI 1 "reg_not_elim_operand" "r,r") 0) + (match_operand:DI 2 "const_int_operand" "I,I")) + 0) + (match_operand:SI 3 "sext_add_operand" "rI,O")))] + "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)" + { + switch (which_alternative) + { + case 0: + if (INTVAL (operands[2]) == 3) + return "s8addw %1,%3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4addw %1,%3,%0"; + case 1: + if (INTVAL (operands[2]) == 3) + return "s8subw %1,%n3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4subw %1,%n3,%0"; + default: + gcc_unreachable (); + } + }) + +(define_insn "*sxsubw" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (minus:SI + (subreg:SI + (ashift:DI + (subreg:DI (match_operand:SI 1 "reg_not_elim_operand" "r,r") 0) + (match_operand:DI 2 "const_int_operand" "I,I")) + 0) + (match_operand:SI 3 "sext_add_operand" "rI,O")))] + "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)" + { + switch (which_alternative) + { + case 0: + if (INTVAL (operands[2]) == 3) + return "s8subw %1,%3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4subw %1,%3,%0"; + case 1: + if (INTVAL (operands[2]) == 3) + return "s8addw %1,%n3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4addw %1,%n3,%0"; + default: + gcc_unreachable (); + } + }) + +(define_insn "*sxaddl" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI + (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r") + (match_operand:DI 2 "const_int_operand" "I,I")) + (match_operand:DI 3 "sext_add_operand" "rI,O")))] + "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)" + { + switch (which_alternative) + { + case 0: + if (INTVAL (operands[2]) == 3) + return "s8addl %1,%3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4addl %1,%3,%0"; + case 1: + if (INTVAL (operands[2]) == 3) + return "s8subl %1,%n3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4subl %1,%n3,%0"; + default: + gcc_unreachable (); + } + }) + +(define_insn "*sxsubl" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (minus:DI + (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r") + (match_operand:DI 2 "const_int_operand" "I,I")) + (match_operand:DI 3 "sext_add_operand" "rI,O")))] + "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)" + { + switch (which_alternative) + { + case 0: + if (INTVAL (operands[2]) == 3) + return "s8subl %1,%3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4subl %1,%3,%0"; + case 1: + if (INTVAL (operands[2]) == 3) + return "s8addl %1,%n3,%0"; + if (INTVAL (operands[2]) == 2) + return "s4addl %1,%n3,%0"; + default: + gcc_unreachable (); + } + }) + + +;; plus:SI (ashift:SI -> plus:SI (mult:SI +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI + (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator" + [(match_operand 2) + (match_operand 3)]) + (match_operand:SI 4 "const48_operand")) + (match_operand:SI 5 "sext_add_operand")))) + (clobber (match_operand:DI 6 "reg_not_elim_operand"))] + "" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4)) + (match_dup 5))))] +{ + operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, + operands[2], operands[3]); + operands[8] = gen_lowpart (SImode, operands[6]); +}) + + +(define_insn "neg2" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (neg:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")))] + "" + "sub $31,%1,%0") + +(define_insn "*negsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (neg:SI + (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))] + "" + "subw $31,%1,%0") + +(define_insn "sub3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ") + (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))] + "" + "sub %r1,%2,%0") + +(define_insn "*subsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))] + "" + "subw %r1,%2,%0") + +(define_insn "*subsi_se2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")) + 0)))] + "" + "subw %r1,%2,%0") + +(define_insn "*ssubl" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r") + (match_operand:SI 2 "const48_operand" "I")) + (match_operand:SI 3 "reg_or_8bit_operand" "rI")))] + "" + "s%2subw %1,%3,%0") + +(define_insn "*ssubq" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r") + (match_operand:DI 2 "const48_operand" "I")) + (match_operand:DI 3 "reg_or_8bit_operand" "rI")))] + "" + "s%2subl %1,%3,%0") + +;;"s%P2subw %1,%3,%0" +(define_insn "*ssubl_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI + (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r") + (match_operand:SI 2 "const48_operand" "I")) + (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))] + "" + "s%2subw %1,%3,%0") + + +(define_insn "mul3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ") + (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))] + "" + "mul %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "")]) + +(define_insn "*mulsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))] + "" + "mulw %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "si")]) + +(define_expand "umuldi3_highpart" + [(set (match_operand:DI 0 "register_operand") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand")) + (match_operand:DI 2 "reg_or_8bit_operand")) + (const_int 64))))] + "" +{ + if (REG_P (operands[2])) + operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]); +}) + +(define_insn "*umuldi3_highpart_reg" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand" "r")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r"))) + (const_int 64))))] + "" + "umulh %1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "udi")]) + +(define_insn "*umuldi3_highpart_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) + (match_operand:TI 2 "cint8_operand" "I")) + (const_int 64))))] + "" + "umulh %1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "udi")]) + +(define_expand "umulditi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI + (zero_extend:TI (match_operand:DI 1 "reg_no_subreg_operand")) + (zero_extend:TI (match_operand:DI 2 "reg_no_subreg_operand"))))] + "" +{ + rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (l, operands[1], operands[2])); + emit_insn (gen_umuldi3_highpart (h, operands[1], operands[2])); + emit_move_insn (gen_lowpart (DImode, operands[0]), l); + emit_move_insn (gen_highpart (DImode, operands[0]), h); + DONE; +}) + +;; The divide and remainder operations take their inputs from r24 and +;; r25, put their output in r27, and clobber r23 and r28 on all systems. +;; +;; ??? Force sign-extension here because some versions of SYSV and +;; Interix/NT don't do the right thing if the inputs are not properly +;; sign-extended. But Linux, for instance, does not have this +;; problem. Is it worth the complication here to eliminate the sign +;; extension? + +(define_code_iterator any_divmod [div mod udiv umod]) + +(define_expand "si3" + [(set (match_dup 3) + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand"))) + (set (match_dup 4) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand"))) + (parallel [(set (match_dup 5) + (sign_extend:DI + (any_divmod:SI (match_dup 3) (match_dup 4)))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))]) + (set (match_operand:SI 0 "nonimmediate_operand") + (subreg:SI (match_dup 5) 0))] + "" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}) + +(define_expand "di3" + [(parallel [(set (match_operand:DI 0 "register_operand") + (any_divmod:DI + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "register_operand"))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] + "") + +(define_insn "int_div_use_float_si" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DF 55)) + (clobber (reg:DF 56)) + (clobber (reg:DF 60))] + "flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==DIV)" + "ifmovd %1,$f23 + fcvtld $f23,$f28 + fcpys $f28,$f28,$f23 + ifmovd %2,$f24 + fcvtld $f24,$f28 + fdivd $f23,$f28,$f24 + fcvtdl_z $f24,$f23 + fimovd $f23,%0" + [(set_attr "type" "fdiv")]) + +(define_insn "int_divu_use_float_si" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DF 55)) + (clobber (reg:DF 56)) + (clobber (reg:DF 60))] + "flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==UDIV)" + "zap %1,240,%1 + zap %2,240,%2 + ifmovd %1,$f23 + fcvtld $f23,$f28 + fcpys $f28,$f28,$f23 + ifmovd %2,$f24 + fcvtld $f24,$f28 + fdivd $f23,$f28,$f24 + fcvtdl_z $f24,$f23 + fimovd $f23,%0" + [(set_attr "type" "fdiv")]) + +(define_insn "int_rem_use_float_si" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DF 54)) + (clobber (reg:DF 55)) + (clobber (reg:DF 56)) + (clobber (reg:DF 60))] + "flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==MOD)" + "ifmovd %1,$f24 + fcvtld $f24,$f28 + fcpys $f28,$f28,$f24 + ifmovd %2,$f23 + fcvtld $f23,$f28 + fdivd $f24,$f28,$f22 + fcvtdl_z $f22,$f23 + fcvtld $f23,$f22 + fnmad $f22,$f28,$f24,$f23 + fcvtdl_z $f23,$f22 + fimovd $f22,%0" + [(set_attr "type" "fdiv")]) + +(define_insn "int_remu_use_float_si" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DF 54)) + (clobber (reg:DF 55)) + (clobber (reg:DF 56)) + (clobber (reg:DF 60))] + "flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==UMOD)" + "zap %1,240,%1 + zap %2,240,%2 + ifmovd %1,$f22 + fcvtld $f22,$f24 + ifmovd %2,$f22 + fcvtld $f22,$f28 + fdivd $f24,$f28,$f23 + fcvtdl_z $f23,$f22 + fcvtld $f22,$f23 + fnmad $f23,$f28,$f24,$f22 + fcvtdl_z $f22,$f23 + fimovd $f23,%0" + [(set_attr "type" "fdiv")]) + + +(define_insn_and_split "*divmodsi_internal_er" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (sign_extend:DI (match_dup 3))) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] +{ + if (flag_sw_int_div_opt) + { + const char *str; + operands[4] = GEN_INT (sw_64_next_sequence_number++); + switch (GET_CODE (operands[3])) + { + case DIV: + emit_insn (gen_int_div_use_float_si (operands[0], operands[1], operands[2], operands[3])); + break; + case UDIV: + emit_insn (gen_int_divu_use_float_si (operands[0], operands[1], operands[2], operands[3])); + break; + case MOD: + emit_insn (gen_int_rem_use_float_si (operands[0], operands[1], operands[2], operands[3])); + break; + case UMOD: + emit_insn (gen_int_remu_use_float_si (operands[0], operands[1], operands[2], operands[3])); + break; + default: + gcc_unreachable (); + } + } + else + { + const char *str; + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divw"; + break; + case UDIV: + str = "__divwu"; + break; + case MOD: + str = "__remw"; + break; + case UMOD: + str = "__remwu"; + break; + default: + gcc_unreachable (); + } + operands[4] = GEN_INT (sw_64_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx, + gen_rtx_SYMBOL_REF (DImode, str), + operands[4])); + } +} + [(set_attr "type" "call") + (set_attr "length" "8")]) + +(define_insn "*divmodsi_internal_er_1" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (use (match_operand:DI 4 "register_operand" "c")) + (use (match_operand 5 "const_int_operand")) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)" + { + if (flag_sw_int_div_opt) + { + switch (GET_CODE (operands[3])) + { + case DIV: + case UDIV: + case MOD: + case UMOD: + return ""; + } + } + else + { + return "call $23,($27),__%E3%j5"; + } + } + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*divmodsi_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")]))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_SW8A && flag_sw_int_divmod" + { + switch (GET_CODE (operands[3])) + { + case DIV: return "divw %1,%2,%0"; + case UDIV: return "udivw %1,%2,%0"; + case MOD: return "remw %1,%2,%0"; + case UMOD: return "uremw %1,%2,%0"; + } + } + [(set_attr "length" "4")]) + +(define_insn "int_div_use_float_di" +[(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "r")) + (use (match_operand:DI 5 "symbolic_operand")) + (use (match_operand 6 "const_int_operand")) + (use (label_ref:DI (match_operand 7))) + (use (label_ref:DI (match_operand 8))) + (clobber (reg:DF 55)) + (clobber (reg:DI 27)) + (clobber (reg:DI 28)) + (clobber (reg:DF 59)) + (clobber (reg:DF 60))] + "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1 &&(GET_CODE (operands[3])==DIV)" + "srl %1,52,$28 + srl %2,52,$27 + bis $28,$27,$28 + bne $28,%l7 + ifmovd %1,$f23 + fcvtld $f23,$f27 + ifmovd %2,$f28 + fcvtld $f28,$f23 + fdivd $f27,$f23,$f28 + fcvtdl_z $f28,$f23 + fimovd $f23,%0 + br %l8 +%l7: + ldl %0,%5(%4)\t\t!literal!%6 + call $23,($27),__%E3%j6 +%l8:" + [(set_attr "cannot_copy" "true") + (set_attr "type" "fdiv")]) + +(define_insn "int_divu_use_float_di" +[(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "r")) + (use (match_operand:DI 5 "symbolic_operand")) + (use (match_operand 6 "const_int_operand")) + (use (label_ref:DI (match_operand 7))) + (use (label_ref:DI (match_operand 8))) + (clobber (reg:DF 55)) + (clobber (reg:DI 27)) + (clobber (reg:DI 28)) + (clobber (reg:DF 59)) + (clobber (reg:DF 60))] + "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==UDIV)" + "srl %1,52,$28 + srl %2,52,$27 + bis $28,$27,$28 + bne $28,%l7 + ifmovd %1,$f23 + fcvtld $f23,$f27 + ifmovd %2,$f28 + fcvtld $f28,$f23 + fdivd $f27,$f23,$f28 + fcvtdl_z $f28,$f23 + fimovd $f23,%0 + br %l8 +%l7: + ldl %0,%5(%4)\t\t!literal!%6 + call $23,($27),__%E3%j6 +%l8:" + [(set_attr "cannot_copy" "true") + (set_attr "type" "fdiv")]) + +(define_insn "int_rem_use_float_di" +[(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "r")) + (use (match_operand:DI 5 "symbolic_operand")) + (use (match_operand 6 "const_int_operand")) + (use (label_ref:DI (match_operand 7))) + (use (label_ref:DI (match_operand 8))) + (clobber (reg:DF 54)) + (clobber (reg:DF 55)) + (clobber (reg:DI 27)) + (clobber (reg:DI 28)) + (clobber (reg:DF 56)) + (clobber (reg:DF 60))] + "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==MOD)" + "srl %1,52,$28 + srl %2,52,$27 + bis $28,$27,$28 + bne $28,%l7 + ifmovd %1,$f22 + fcvtld $f22,$f24 + ifmovd %2,$f22 + fcvtld $f22,$f28 + fdivd $f24,$f28,$f22 + fcvtdl_z $f22,$f23 + fcvtld $f23,$f22 + fnmad $f22,$f28,$f24,$f23 + fcvtdl_z $f23,$f22 + fimovd $f22,%0 + br %l8 +%l7: + ldl %0,%5(%4)\t\t!literal!%6 + call $23,($27),__%E3%j6 +%l8:" + [(set_attr "cannot_copy" "true") + (set_attr "type" "fdiv")]) + +(define_insn "int_remu_use_float_di" +[(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "r")) + (use (match_operand:DI 5 "symbolic_operand")) + (use (match_operand 6 "const_int_operand")) + (use (label_ref:DI (match_operand 7))) + (use (label_ref:DI (match_operand 8))) + (clobber (reg:DF 54)) + (clobber (reg:DF 55)) + (clobber (reg:DI 27)) + (clobber (reg:DI 28)) + (clobber (reg:DF 56)) + (clobber (reg:DF 60))] + "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1 + &&(GET_CODE (operands[3])==UMOD)" + " srl %1,52,$28 + srl %2,52,$27 + bis $28,$27,$28 + bne $28,%l7 + ifmovd %1,$f22 + fcvtld $f22,$f24 + ifmovd %2,$f22 + fcvtld $f22,$f28 + fdivd $f24,$f28,$f23 + fcvtdl_z $f23,$f22 + fcvtld $f22,$f23 + fnmad $f23,$f28,$f24,$f22 + fcvtdl_z $f22,$f23 + fimovd $f23,%0 + br %l8 +%l7: + ldl %0,%5(%4)\t\t!literal!%6 + call $23,($27),__%E3%j6 +%l8:" + [(set_attr "cannot_copy" "true") + (set_attr "type" "fdiv")]) + +(define_insn_and_split "*divmoddi_internal_er" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (match_dup 3)) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] +{ + if (flag_sw_int_div_opt) + { + const char *str; + operands[4] = GEN_INT (sw_64_next_sequence_number++); + operands[7] = gen_label_rtx (); + operands[8] = gen_label_rtx (); + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divl"; + emit_insn (gen_int_div_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8])); + break; + case UDIV: + str = "__divlu"; + emit_insn (gen_int_divu_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8])); + break; + case MOD: + str = "__reml"; + emit_insn (gen_int_rem_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8])); + break; + case UMOD: + str = "__remlu"; + emit_insn (gen_int_remu_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8])); + break; + default: + gcc_unreachable (); + } + } + else + { + const char *str; + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divl"; + break; + case UDIV: + str = "__divlu"; + break; + case MOD: + str = "__reml"; + break; + case UMOD: + str = "__remlu"; + break; + default: + gcc_unreachable (); + } + operands[4] = GEN_INT (sw_64_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx, + gen_rtx_SYMBOL_REF (DImode, str), + operands[4])); + } +} + [(set_attr "type" "call") + (set_attr "length" "8")]) + +(define_insn "*divmoddi_internal_er_1" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "c")) + (use (match_operand 5 "const_int_operand")) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)" + { + if (flag_sw_int_div_opt) + { + switch (GET_CODE (operands[3])) + { + case DIV: + case UDIV: + case MOD: + case UMOD: + return ""; + } + } + else + { + return "call $23,($27),__%E3%j5"; + } + } + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*divmoddi_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")])) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_SW8A && flag_sw_int_divmod" + { + switch (GET_CODE (operands[3])) + { + case DIV: return "divl %1,%2,%0"; + case UDIV: return "udivl %1,%2,%0"; + case MOD: return "reml %1,%2,%0"; + case UMOD: return "ureml %1,%2,%0"; + } + } + [(set_attr "length" "4")]) + +;; Next are the basic logical operations. We only expose the DImode operations +;; to the rtl expanders, but SImode versions exist for combine as well as for +;; the atomic operation splitters. + +(define_insn "*andsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ") + (match_operand:SI 2 "and_operand" "rI,N,M")))] + "" + "@ + and %r1,%2,%0 + bic %r1,%N2,%0 + zapnot %r1,%m2,%0" + [(set_attr "type" "ilog,ilog,shift")]) + +(define_insn "anddi3" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ") + (match_operand:DI 2 "and_operand" "rI,N,M")))] + "" + "@ + and %r1,%2,%0 + bic %r1,%N2,%0 + zapnot %r1,%m2,%0" + [(set_attr "type" "ilog,ilog,shift")]) + +;; There are times when we can split an AND into two AND insns. This occurs +;; when we can first clear any bytes and then clear anything else. For +;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07". +;; Only do this when running on 64-bit host since the computations are +;; too messy otherwise. + +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_int_operand")))] + "! and_operand (operands[2], DImode)" + [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))] +{ + unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]); + unsigned HOST_WIDE_INT mask2 = mask1; + int i; + + /* For each byte that isn't all zeros, make it all ones. */ + for (i = 0; i < 64; i += 8) + if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0) + mask1 |= (HOST_WIDE_INT) 0xff << i; + + /* Now turn on any bits we've just turned off. */ + mask2 |= ~ mask1; + + operands[3] = GEN_INT (mask1); + operands[4] = GEN_INT (mask2); +}) + +(define_insn "zero_extendqi2" + [(set (match_operand:I248MODE 0 "register_operand" "=r,r") + (zero_extend:I248MODE + (match_operand:QI 1 "reg_or_bwx_memory_operand" "r,m")))] + "" + "@ + and %1,0xff,%0 + ldbu%U1 %0,%1" + [(set_attr "type" "ilog,ild") + (set_attr "isa" "*,bwx")]) + +(define_insn "zero_extendhi2" + [(set (match_operand:I48MODE 0 "register_operand" "=r,r") + (zero_extend:I48MODE + (match_operand:HI 1 "reg_or_bwx_memory_operand" "r,m")))] + "" + "@ + zapnot %1,3,%0 + ldhu%U1 %0,%1" + [(set_attr "type" "shift,ild") + (set_attr "isa" "*,bwx")]) + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "" + "zapnot %1,15,%0" + [(set_attr "type" "shift")]) + +(define_insn "andnot3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (and:I48MODE + (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")) + (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))] + "" + "bic %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iorsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "or_operand" "rI,N")))] + "" + "@ + bis %r1,%2,%0 + ornot %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "or_operand" "rI,N")))] + "" + "@ + bis %r1,%2,%0 + ornot %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*one_cmplsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "one_cmpldi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornot3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (ior:I48MODE + (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")) + (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))] + "" + "ornot %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xorsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "or_operand" "rI,N")))] + "" + "@ + xor %r1,%2,%0 + eqv %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "or_operand" "rI,N")))] + "" + "@ + xor %r1,%2,%0 + eqv %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornot3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (not:I48MODE (xor:I48MODE + (match_operand:I48MODE 1 "register_operand" "%rJ") + (match_operand:I48MODE 2 "register_operand" "rI"))))] + "" + "eqv %r1,%2,%0" + [(set_attr "type" "ilog")]) + +;; Handle FFS and related insns iff we support CIX. + +(define_expand "ffsdi2" + [(set (match_dup 2) + (ctz:DI (match_operand:DI 1 "register_operand"))) + (set (match_dup 3) + (plus:DI (match_dup 2) (const_int 1))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (eq (match_dup 1) (const_int 0)) + (const_int 0) (match_dup 3)))] + "" +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "clzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "register_operand" "r")))] + "" + "ctlz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "register_operand" "r")))] + "" + "cttz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "register_operand" "r")))] + "" + "ctpop %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "popcountsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (popcount:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "zapnot %1,15,%0\;ctpop %0,%0" + [(set_attr "type" "mvi")]) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand") + (bswap:SI (match_operand:SI 1 "register_operand")))] + "!optimize_size" +{ + if (TARGET_SW8A == 0 || flag_sw_rev != 1) + { + rtx t0, t1; + + t0 = gen_reg_rtx (DImode); + t1 = gen_reg_rtx (DImode); + + emit_insn (gen_inslh (t0, gen_lowpart (DImode, operands[1]), GEN_INT (7))); + emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]), + GEN_INT (24))); + emit_insn (gen_iordi3 (t1, t0, t1)); + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16))); + emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x5))); + emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xa))); + emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0), + gen_lowpart (SImode, t1))); + DONE; + } + else + { + emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "bswapdi2" + [(set (match_operand:DI 0 "register_operand") + (bswap:DI (match_operand:DI 1 "register_operand")))] + "!optimize_size" +{ + if (TARGET_SW8A == 0 || flag_sw_rev != 1) + { + rtx t0, t1; + + t0 = gen_reg_rtx (DImode); + t1 = gen_reg_rtx (DImode); + + /* This method of shifting and masking is not specific to Sw_64, but + is only profitable on Sw_64 because of our handy byte zap insn. */ + + emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32))); + emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32))); + emit_insn (gen_iordi3 (t1, t0, t1)); + + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16))); + emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16))); + emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xcc))); + emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x33))); + emit_insn (gen_iordi3 (t1, t0, t1)); + + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8))); + emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8))); + emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xaa))); + emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x55))); + emit_insn (gen_iordi3 (operands[0], t0, t1)); + DONE; + } + else + { + emit_insn (gen_bswapdi2_internal (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (bswap:HI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_SW8A && flag_sw_rev == 1" + "revbh %1,%0" + [(set_attr "isa" "sw8a")]) + +(define_insn "bswapsi2_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_SW8A && flag_sw_rev == 1" + "revbw %1,%0" + [(set_attr "isa" "sw8a")]) + +(define_insn "bswapdi2_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (bswap:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_SW8A && flag_sw_rev == 1" + "revbl %1,%0" + [(set_attr "isa" "sw8a")]) + +(define_insn "ldfdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI + (unspec:DF [(match_operand:DF 1 "register_operand" "fG")] + FRINT)))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmov%-l %1, %0" + [(set_attr "type" "frint")]) + +(define_insn "fix_truncdfdi2_8a" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&r,&r") + (fix:DI + (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1" + "cmov%-l%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "ludfdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unsigned_fix:DI + (unspec:DF [(match_operand:DF 1 "register_operand" "fG")] + FRINT)))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmov%-lu %1, %0" + [(set_attr "type" "frint")]) + +(define_insn "fixuns_truncdfdi2_internal" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&r,&r") + (unsigned_fix:DI + (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1" + "cmov%-lu%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "idfsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (fix:SI + (unspec:DF [(match_operand:DF 1 "register_operand" "fG")] + FRINT)))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmov%-w %1, %0" + [(set_attr "type" "frint")]) + +;; CMOVDW_Z PART1 +(define_insn "fix_truncdfsi2_8a" + [(set (match_operand:SI 0 "reg_no_subreg_operand" "=&r,&r") + (fix:SI + (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1" + "cmov%-w%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +;; CMOVDW_Z PART2 +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "reg_no_subreg_operand") + (fix:SI (match_operand:DF 1 "reg_or_0_operand")))] + "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1") + +(define_insn "iudfsi2" + [(set (match_operand:SI 0 "register_operand" "=&r,&r") + (unsigned_fix:SI + (unspec:DF [(match_operand:DF 1 "register_operand" "fG,fG")] + FRINT)))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmov%-wu %1, %0" + [(set_attr "type" "frint")]) + +;; CMOVDWU_Z PART1 +(define_insn "*fixuns_truncdfsi2" + [(set (match_operand:SI 0 "reg_no_subreg_operand" "=&r,&r") + (unsigned_fix:SI + (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1" + "cmov%-wu%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +;; CMOVDWU_Z PART2 +(define_expand "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "reg_no_subreg_operand") + (unsigned_fix:SI (match_operand:DF 1 "reg_or_0_operand")))] + "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1") + +(define_insn "floatdisf2_8a" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovls %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatunsdisf2" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (unsigned_float:SF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovuls %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (float:SF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovws %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatunssisf2" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (unsigned_float:SF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovuws %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatdidf2_8a" + [(set (match_operand:DF 0 "register_operand" "=&f,&f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1 && TARGET_FP" + "cmovl%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatunsdidf2" + [(set (match_operand:DF 0 "register_operand" "=&f,&f") + (unsigned_float:DF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovuld %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=&f,&f") + (float:DF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovwd %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "floatunssidf2" + [(set (match_operand:DF 0 "register_operand" "=&f,&f") + (unsigned_float:DF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))] + "TARGET_SW8A && flag_sw_cmov == 1" + "cmovuwd %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "builtin_sbt" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (unspec:DI + [(match_operand:DI 2 "reg_or_6bit_operand" "rI")] + UNSPEC_SBT) + (match_operand:DI 1 "register_operand" "r")))] + "flag_sw_bitop" + "sbt %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "builtin_cbt" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (unspec:DI + [(match_operand:DI 2 "reg_or_6bit_operand" "rI")] + UNSPEC_CBT) + (match_operand:DI 1 "register_operand" "r")))] + "flag_sw_bitop" + "cbt %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_5bit_operand" "rY")))] + "TARGET_SW8A && flag_sw_shift_word == 1" + "srlw %r1,%2,%0" + [(set_attr "type" "shift") + (set_attr "isa" "sw8a")]) + +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_5bit_operand" "rY")))] + "TARGET_SW8A && flag_sw_shift_word == 1" + "sraw %r1,%2,%0" + [(set_attr "type" "shift") + (set_attr "isa" "sw8a")]) + +(define_insn "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (subreg:SI + (zero_extend:DI (rotate:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_5bit_operand" "rY"))) 0))] + "TARGET_SW8A && flag_sw_shift_word == 1" + "rolw %r1,%2,%0" + [(set_attr "type" "shift") + (set_attr "isa" "sw8a")]) + +(define_insn "rotldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "TARGET_SW8A && flag_sw_shift_word == 1" + "roll %r1,%2,%0" + [(set_attr "type" "shift") + (set_attr "isa" "sw8a")]) + +;; Next come the shifts and the various extract and insert operations. + +(define_insn "ashldi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))] + "" +{ + switch (which_alternative) + { + case 0: + if (operands[2] == const1_rtx) + return "addl %r1,%r1,%0"; + else + return "sll %r1,%2,%0"; + case 1: + if (TARGET_SW8A == 0 || flag_sw_shift_word != 1) + return "sll %r1,%2,%0"; + else + return "slll %r1,%2,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "iadd,shift")]) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand") + (ashift:SI (match_operand:SI 1 "reg_or_0_operand") + (match_operand:SI 2 "reg_or_5bit_operand")))]) + +(define_insn "*ashlsi3_sll" + [(set (match_operand:SI 0 "register_operand" "=r,&r") + (ashift:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,rJ") + (match_operand:SI 2 "reg_or_5bit_operand" "P,rS")))] + "TARGET_SW8A == 0 || flag_sw_shift_word != 1" +{ + switch (which_alternative) + { + case 0: + if (operands[2] == const1_rtx) + return "addw %r1,%r1,%0"; + else + return "s%P2addw %r1,0,%0"; + case 1: + if (REG_P (operands[2])) + return "and %2,31,%0\;sll %r1,%0,%0"; + else + return "sll %r1,%2,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "iadd,shift")]) + +(define_insn "*ashlsi3_sllw" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ashift:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,rJ") + (match_operand:SI 2 "reg_or_5bit_operand" "P,rY")))] + "TARGET_SW8A && flag_sw_shift_word == 1" +{ + switch (which_alternative) + { + case 0: + if (operands[2] == const1_rtx) + return "addw %r1,%r1,%0"; + else + return "s%P2addw %r1,0,%0"; + case 1: + return "sllw %r1,%2,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "iadd,shift") + (set_attr "isa" "*,sw8a")]) + +(define_insn "*ashldi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "const_int_operand" "P")) + 0)))] + "IN_RANGE (INTVAL (operands[2]), 1, 3)" +{ + if (operands[2] == const1_rtx) + return "addw %r1,%r1,%0"; + else + return "s%P2addw %r1,0,%0"; +} + [(set_attr "type" "iadd")]) + +(define_insn "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "" +{ + if (TARGET_SW8A == 0 || flag_sw_shift_word != 1) + return "srl %r1,%2,%0"; + else + return "srll %r1,%2,%0"; +} + [(set_attr "type" "shift")]) + +(define_insn "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "" +{ + if (TARGET_SW8A == 0 || flag_sw_shift_word != 1) + return "sra %r1,%2,%0"; + else + return "sral %r1,%2,%0"; +} + [(set_attr "type" "shift")]) + +(define_insn "extendqi2" + [(set (match_operand:I24MODE 0 "register_operand" "=r") + (sign_extend:I24MODE + (match_operand:QI 1 "register_operand" "r")))] + "" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_expand "extendqidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:QI 1 "general_operand")))] + "" +{ + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*extendqidi2_bwx" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "r")))] + "" + "sexth %1,%0" + [(set_attr "type" "shift")]) + +(define_expand "extendhidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:HI 1 "general_operand")))] + "" +{ + operands[1] = force_reg (HImode, operands[1]); +}) + +(define_insn "*extendhidi2_bwx" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "" + "sexth %1,%0" + [(set_attr "type" "shift")]) + +;; Here's how we sign extend an unaligned byte and halfword. Doing this +;; as a pattern saves one instruction. The code is similar to that for +;; the unaligned loads (see below). +;; +;; Operand 1 is the address, operand 0 is the result. + +(define_expand "unaligned_extendqidi" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (minus:DI (const_int 64) + (ashift:DI + (and:DI (match_dup 2) (const_int 7)) + (const_int 3))))) + (set (match_operand:QI 0 "register_operand") + (ashiftrt:DI (match_dup 4) (const_int 56)))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[2] = get_unaligned_offset (operands[1], 1); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_expand "unaligned_extendhidi" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (minus:DI (const_int 64) + (ashift:DI + (and:DI (match_dup 2) (const_int 7)) + (const_int 3))))) + (set (match_operand:HI 0 "register_operand") + (ashiftrt:DI (match_dup 4) (const_int 48)))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[2] = get_unaligned_offset (operands[1], 2); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +;; add if condition +(define_insn "*extxl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "mul8_operand" "I")))] + "" +{ + if (INTVAL (operands[2])==8) + return "extlb %r1,%s3,%0"; + else if (INTVAL (operands[2])==16) + return "extlh %r1,%s3,%0"; + else if (INTVAL (operands[2])==32) + return "extlw %r1,%s3,%0"; + else if (INTVAL (operands[2])==64) + return "extll %r1,%s3,%0"; +} + [(set_attr "type" "shift")]) + +;; add if condition +(define_insn "extxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" +{ + if (INTVAL (operands[2])==8) + return "extlb %r1,%3,%0"; + else if (INTVAL (operands[2])==16) + return "extlh %r1,%3,%0"; + else if (INTVAL (operands[2])==32) + return "extlw %r1,%3,%0"; + else if (INTVAL (operands[2])==64) + return "extll %r1,%3,%0"; +} + [(set_attr "type" "shift")]) + +;; Combine has some strange notion of preserving existing undefined behavior +;; in shifts larger than a word size. So capture these patterns that it +;; should have turned into zero_extracts. + +;; add if condition +(define_insn "*extxl_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))) + (match_operand:DI 3 "mode_mask_operand" "n")))] + "" +{ + if (INTVAL (operands[3]) == 0xff) + return "extlb %r1,%2,%0"; + else if (INTVAL (operands[3]) == 0xffff) + return "extlh %r1,%2,%0"; + else if (INTVAL (operands[3]) == 0xffffffff) + return "extlw %r1,%2,%0"; + else if (INTVAL (operands[3]) == -1) + return "extll %r1,%2,%0"; +} + [(set_attr "type" "shift")]) + +(define_insn "*extql_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "extll %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extqh" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "" + "exthl %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extwh" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 65535)) + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "" + "exthh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extlh" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 2147483647)) + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "" + "exthw %r1,%2,%0" + [(set_attr "type" "shift")]) + +;; This converts an extXl into an extXh with an appropriate adjustment +;; to the address calculation. + +(define_insn "insbl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:QI 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "inslb %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "inswl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:HI 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "inslh %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insll_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "inslw %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insbl" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:QI 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "inslb %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "inswl" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:HI 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "inslh %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insll" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "inslw %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insql" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "insll %1,%2,%0" + [(set_attr "type" "shift")]) + +;; Combine has this sometimes habit of moving the and outside of the +;; shift, making life more interesting. + +(define_insn "*insxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mul8_operand" "I")) + (match_operand:DI 3 "const_int_operand" "i")))] + "((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))" +{ +#if HOST_BITS_PER_WIDE_INT == 64 + if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "inslb %1,%s2,%0"; + if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "inslh %1,%s2,%0"; + if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "inslw %1,%s2,%0"; +#endif + gcc_unreachable (); +} + [(set_attr "type" "shift")]) + +;; We do not include the insXh insns because they are complex to express +;; and it does not appear that we would ever want to generate them. +;; +;; Since we need them for block moves, though, cop out and use unspec. + +(define_insn "insxh" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")] + UNSPEC_INSXH))] + "" +{ + if (INTVAL (operands[2])==16) + return "inshh %r1,%3,%0"; + else if (INTVAL (operands[2])==32) + return "inshw %r1,%3,%0"; + else if (INTVAL (operands[2])==64) + return "inshl %r1,%3,%0"; +} + [(set_attr "type" "shift")]) + +(define_insn "mskxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (ashift:DI + (match_operand:DI 2 "mode_mask_operand" "n") + (ashift:DI + (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3)))) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "" +{ + if (INTVAL (operands[2]) == 0xff) + return "masklb %r1,%3,%0"; + else if (INTVAL (operands[2]) == 0xffff) + return "masklh %r1,%3,%0"; + else if (INTVAL (operands[2]) == 0xffffffff) + return "masklw %r1,%3,%0"; + else if (INTVAL (operands[2]) == -1) + return "maskll %r1,%3,%0"; +} + [(set_attr "type" "shift")]) + +;; We do not include the mskXh insns because it does not appear we would +;; ever generate one. +;; +;; Again, we do for block moves and we use unspec again. + +(define_insn "mskxh" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")] + UNSPEC_MSKXH))] + "" +{ + if (INTVAL (operands[2])==16) + return "maskhh %r1,%3,%0"; + else if (INTVAL (operands[2])==32) + return "maskhw %r1,%3,%0"; + else if (INTVAL (operands[2])==64) + return "maskhl %r1,%3,%0"; +} + [(set_attr "type" "shift")]) + + +(define_insn_and_split "*ze_and_ne" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (match_operand 2 "const_int_operand" "I")))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + "#" + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) + (ne:DI (match_dup 0) (const_int 0)))] + "operands[3] = GEN_INT (1 << INTVAL (operands[2]));") + +;; Floating-point operations. All the double-precision insns can extend +;; from single, so indicate that. The exception are the ones that simply +;; play with the sign bits; it's not clear what to do there. + +(define_mode_iterator FMODE [SF DF]) + +(define_mode_attr opmode [(SF "si") (DF "di")]) + +(define_insn "abs2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "fcpys $f31,%R1,%0" + [(set_attr "type" "fcpys")]) + +(define_insn "*nabs2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (neg:FMODE + (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG"))))] + "TARGET_FP" + "fcpysn $f31,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_expand "abstf2" + [(parallel [(set (match_operand:TF 0 "register_operand") + (abs:TF (match_operand:TF 1 "reg_or_0_operand"))) + (use (match_dup 2))])] + "TARGET_HAS_XFLOATING_LIBS" + "operands[2] = force_reg (DImode, GEN_INT (HOST_WIDE_INT_1U << 63));") + +(define_insn_and_split "*abstf_internal" + [(set (match_operand:TF 0 "register_operand" "=r") + (abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG"))) + (use (match_operand:DI 2 "register_operand" "r"))] + "TARGET_HAS_XFLOATING_LIBS" + "#" + "&& reload_completed" + [(const_int 0)] + "sw_64_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;") + +(define_insn "neg2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (neg:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "fcpysn %R1,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_expand "negtf2" + [(parallel [(set (match_operand:TF 0 "register_operand") + (neg:TF (match_operand:TF 1 "reg_or_0_operand"))) + (use (match_dup 2))])] + "TARGET_HAS_XFLOATING_LIBS" + "operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));") + +(define_insn_and_split "*negtf_internal" + [(set (match_operand:TF 0 "register_operand" "=r") + (neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG"))) + (use (match_operand:DI 2 "register_operand" "r"))] + "TARGET_HAS_XFLOATING_LIBS" + "#" + "&& reload_completed" + [(const_int 0)] + "sw_64_split_tfmode_frobsign (operands, gen_xordi3); DONE;") + +(define_insn "copysign3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN))] + "TARGET_FP" + "fcpys %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*ncopysign3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (neg:FMODE + (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN)))] + "TARGET_FP" + "fcpysn %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*add3" + [(set (match_operand:FMODE 0 "register_operand" "=&f,&f") + (plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fadd%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*add3_same" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 1" + "fadd%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "add3" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "TARGET_FP" + "" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "*fmasf4" + [(set (match_operand:SF 0 "register_operand" "=&f") + (fma:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "flag_sw_sdsame == 0" + "fmas %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fmasf4_same" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "flag_sw_sdsame == 1" + "fmas %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fmasf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fmadf4" + [(set (match_operand:DF 0 "register_operand" "=&f") + (fma:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "flag_sw_sdsame == 0" + "fmad %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fmadf4_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "flag_sw_sdsame == 1" + "fmad %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fmadf4" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fmssf4" + [(set (match_operand:SF 0 "register_operand" "=&f") + (fma:SF + (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (neg:SF (match_operand:SF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 0" + "fmss %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fmssf4_same" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF + (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (neg:SF (match_operand:SF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 1" + "fmss %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fmssf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF + (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (neg:SF (match_operand:SF 3 "register_operand" "f"))))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fmsdf4" + [(set (match_operand:DF 0 "register_operand" "=&f") + (fma:DF + (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (neg:DF (match_operand:DF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 0" + "fmsd %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fmsdf4_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF + (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (neg:DF (match_operand:DF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 1" + "fmsd %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fmsdf4" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF + (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (neg:DF (match_operand:DF 3 "register_operand" "f"))))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fnmasf4" + [(set (match_operand:SF 0 "register_operand" "=&f") + (fma:SF + (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "flag_sw_sdsame == 0" + "fnmas %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fnmasf4_same" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF + (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "flag_sw_sdsame == 1" + "fnmas %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fnmasf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF + (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fnmadf4" + [(set (match_operand:DF 0 "register_operand" "=&f") + (fma:DF + (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "flag_sw_sdsame == 0" + "fnmad %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fnmadf4_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF + (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "flag_sw_sdsame == 1" + "fnmad %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fnmadf4" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF + (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fnmssf4" + [(set (match_operand:SF 0 "register_operand" "=&f") + (fma:SF + (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (neg:SF (match_operand:SF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 0" + "fnmss %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fnmssf4_same" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF + (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (neg:SF (match_operand:SF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 1" + "fnmss %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fnmssf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF + (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (neg:SF (match_operand:SF 3 "register_operand" "f"))))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*fnmsdf4" + [(set (match_operand:DF 0 "register_operand" "=&f") + (fma:DF + (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (neg:DF (match_operand:DF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 0" + "fnmsd %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*fnmsdf4_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF + (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (neg:DF (match_operand:DF 3 "register_operand" "f"))))] + "flag_sw_sdsame == 1" + "fnmsd %R1,%R2,%R3,%0" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "fnmsdf4" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF + (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (neg:DF (match_operand:DF 3 "register_operand" "f"))))] + "flag_sw_fma==1 && TARGET_FP" + "" + [(set_attr "type" "fmadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ext1" + [(set (match_operand:DF 0 "register_operand" "=&f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fadd%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*adddf_ext1_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fadd%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ext2" + [(set (match_operand:DF 0 "register_operand" "=&f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fadd%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*adddf_ext2_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fadd%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "addtf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_arith (PLUS, operands); DONE;") + +(define_insn "*sub3" + [(set (match_operand:FMODE 0 "register_operand" "=&f,&f") + (minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fsub%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*sub3_same" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 1" + "fsub%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "sub3" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "TARGET_FP" + "" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "*subdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=&f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fsub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*subdf_ext1_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fsub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=&f") + (minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fsub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*subdf_ext2_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fsub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=&f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fsub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*subdf_ext3_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fsub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "subtf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_arith (MINUS, operands); DONE;") + +(define_insn "*mul3" + [(set (match_operand:FMODE 0 "register_operand" "=&f,&f") + (mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fmul%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*mul3_same" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 1" + "fmul%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "mul3" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "TARGET_FP" + "" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "*muldf_ext1" + [(set (match_operand:DF 0 "register_operand" "=&f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fmul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*muldf_ext1_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fmul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*muldf_ext2" + [(set (match_operand:DF 0 "register_operand" "=&f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fmul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*muldf_ext2_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fmul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "multf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_arith (MULT, operands); DONE;") + +(define_insn "div3_ieee" + [(set (match_operand:FMODE 0 "register_operand" "=&f,&f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fdiv%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +;; Floating point reciprocal approximation +(define_insn "fre" + [(set (match_operand:SFDF 0 "register_operand" "=f") + (unspec:SFDF [(match_operand:SFDF 1 "register_operand" "f")] + UNSPEC_FRECX))] + "(flag_sw_recip || flag_sw_recip_precision) && flag_reciprocal_math && TARGET_SW8A" + "frec %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "*div3" + [(set (match_operand:FMODE 0 "register_operand" "=&f,&f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fdiv%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*div3_same" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 1" + "fdiv%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_expand "div3" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))] + "TARGET_FP" +{ + if ((flag_sw_recip || flag_sw_recip_precision) && flag_reciprocal_math && TARGET_SW8A) + { + if (operands[1] == CONST0_RTX (mode)) + operands[1] = gen_move_reg (operands[1]); + + if (operands[2] == CONST0_RTX (mode)) + operands[2] = gen_move_reg (operands[2]); + + sw_64_emit_swdiv (operands[0], operands[1], operands[2], true); + DONE; + } + +}) + +(define_insn "*div3_fpr" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "fdiv%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=&f") + (div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fdiv%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*divdf_ext1_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fdiv%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=&f") + (div:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fdiv%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*divdf_ext2_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fdiv%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=&f") + (div:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fdiv%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_insn "*divdf_ext3_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fdiv%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) +(define_expand "divtf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_arith (DIV, operands); DONE;") + +;; frint floating-point round to integral standard patterns. +(define_insn "2" + [(set (match_operand:SFDF 0 "register_operand" "=f") + (unspec:SFDF [(match_operand:SFDF 1 "register_operand" "f")] + FRINT))] + "TARGET_SW8A && flag_sw_fprnd" + "fri %1, %0" + [(set_attr "type" "frint")]) + +(define_insn "*sqrt2" + [(set (match_operand:FMODE 0 "register_operand" "=&f,&f") + (sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fsqrt%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*sqrt2_same" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 1" + "fsqrt%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "sqrt2" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))] + "TARGET_FP" + "" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +;; Define conversion operators between DFmode and SImode, using the cvtql +;; instruction. To allow combine et al to do useful things, we keep the +;; operation as a unit until after reload, at which point we split the +;; instructions. +;; +;; Note that we (attempt to) only consider this optimization when the +;; ultimate destination is memory. If we will be doing further integer +;; processing, it is cheaper to do the truncation in the int regs. + +(define_insn "*cvtql" + [(set (match_operand:SF 0 "register_operand" "=&f") + (unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTQL))] + "TARGET_FP && flag_sw_sdsame == 0" + "fcvtlw%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "v_sv")]) +(define_insn "*cvtql_same" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTQL))] + "TARGET_FP && flag_sw_sdsame == 1" + "fcvtlw%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "v_sv")]) + +(define_insn_and_split "*fix_truncdfsi_ieee" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] +{ + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +;; mieee-opt +(define_insn_and_split "*fix_truncdfsi_internal" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] +{ + //operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2])); + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*fix_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f,&f") + (match_operator:DI 2 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG,fG")]))] + "TARGET_FP && ((flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B)" + "fcvt%-l%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*fix_truncdfdi2_same" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f,f") + (match_operator:DI 2 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG,fG")]))] + "TARGET_FP && ((flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B)" + "fcvt%-l%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "fix_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (fix:DI (match_operand:DF 1 "reg_or_0_operand")))] + "TARGET_FP") + +(define_expand "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand")))] + "TARGET_FP" +{ + if ((TARGET_SW8A == 1 && flag_sw_cmov != 1) || TARGET_SW6B) + { + rtx reg1 = gen_reg_rtx (DFmode); + rtx reg2 = gen_reg_rtx (DFmode); + rtx reg3 = gen_reg_rtx (DImode); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + rtx test; + REAL_VALUE_TYPE offset; + + real_2expN (&offset, 63, DFmode); + + emit_move_insn (reg1, const_double_from_real_value (offset, DFmode)); + do_pending_stack_adjust (); + + test = gen_rtx_GE (VOIDmode, operands[1], reg1); + emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); + + emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1])); + emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); + emit_barrier (); + + emit_label (label1); + emit_move_insn (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); + emit_move_insn (reg3, GEN_INT (BITMASK_HIGH)); + emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32))); + + emit_insn (gen_fix_truncdfdi2 (operands[0], reg2)); + emit_insn (gen_iordi3 (operands[0], operands[0], reg3)); + + emit_label (label2); + + /* Allow REG_NOTES to be set on last insn (labels don't have enough + fields, and can't be used for REG_NOTES anyway). */ + emit_use (stack_pointer_rtx); + DONE; + } + else + { + emit_insn (gen_fixuns_truncdfdi2_internal (operands[0], operands[1])); + DONE; + } +}) + + +;; Likewise between SFmode and SImode. + +(define_insn_and_split "*fix_truncsfsi_ieee" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && ((sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0) || TARGET_SW6B)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] + "operands[5] = adjust_address (operands[0], SFmode, 0);" + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +;; mieee-opt +(define_insn_and_split "*fix_truncsfsi_internal" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && ((sw_64_fptm < SW_64_FPTM_SU && flag_sw_cmov == 0) || TARGET_SW6B)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] +{ + // operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2])); + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*fix_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f,&f") + (match_operator:DI 2 "fix_operator" + [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,fG"))]))] + "TARGET_FP && ((flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B)" + "fcvt%-l%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*fix_truncsfdi2_same" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f,f") + (match_operator:DI 2 "fix_operator" + [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,fG"))]))] + "TARGET_FP && ((flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B)" + "fcvt%-l%T2 %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "fix_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))] + "TARGET_FP && flag_sw_cmov == 0") + +(define_expand "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (unsigned_fix:DI + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))] + "TARGET_FP" +{ + if ( (TARGET_SW8A == 1 && flag_sw_cmov != 1) || TARGET_SW6B) + { + rtx reg1 = gen_reg_rtx (SFmode); + rtx reg2 = gen_reg_rtx (DFmode); + rtx reg3 = gen_reg_rtx (DImode); + rtx reg4 = gen_reg_rtx (DFmode); + rtx reg5 = gen_reg_rtx (DFmode); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + rtx test; + REAL_VALUE_TYPE offset; + + real_2expN (&offset, 63, SFmode); + + emit_move_insn (reg1, const_double_from_real_value (offset, SFmode)); + do_pending_stack_adjust (); + + test = gen_rtx_GE (SFmode, operands[1], reg1); + emit_insn (gen_extendsfdf2 (reg4, reg1)); + emit_insn (gen_extendsfdf2 (reg2, operands[1])); + emit_jump_insn (gen_cbranchdf4 (test, reg2, reg4, label1)); + + emit_insn (gen_fix_truncdfdi2 (operands[0], reg2)); + emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); + emit_barrier (); + + emit_label (label1); + emit_move_insn (reg5, gen_rtx_MINUS (DFmode, reg2, reg4)); + emit_move_insn (reg3, GEN_INT (BITMASK_HIGH)); + emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32))); + + emit_insn (gen_fix_truncdfdi2 (operands[0], reg5)); + emit_insn (gen_iordi3 (operands[0], operands[0], reg3)); + + emit_label (label2); + + /* Allow REG_NOTES to be set on last insn (labels don't have enough + fields, and can't be used for REG_NOTES anyway). */ + emit_use (stack_pointer_rtx); + DONE; + } + else + { + rtx reg2 = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (reg2, operands[1])); + emit_insn (gen_fixuns_truncdfdi2_internal (operands[0], reg2)); + DONE; + } +}) + + + +(define_expand "fix_trunctfdi2" + [(use (match_operand:DI 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_cvt (FIX, operands); DONE;") + +(define_expand "fixuns_trunctfdi2" + [(use (match_operand:DI 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;") + +(define_insn "*floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))] + "(flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B" + "fcvtl%,%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*floatdisf2_same" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))] + "(flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B" + "fcvtl%,%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))] + "TARGET_FP" + "" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn_and_split "*floatsisf2_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float:SF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:SF (match_dup 2)))] + "operands[1] = adjust_address (operands[1], SFmode, 0);") + +;; mieee-opt +(define_insn_and_split "*floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float:SF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && flag_sw_cmov == 0 && ! TARGET_SW8A" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:SF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + //operands[2] = gen_rtx_REG (DImode, REGNO (operands[0])); +}) + +(define_insn "*floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=&f,&f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))] + "flag_sw_sdsame == 0 " + "fcvtl%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*floatdidf2_same" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))] + "flag_sw_sdsame == 1 " + "fcvtl%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))] + "TARGET_FP " + "" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn_and_split "*floatsidf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float:DF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:DF (match_dup 2)))] + "operands[1] = adjust_address (operands[1], SFmode, 0);") + +;; mieee-opt +(define_insn_and_split "*floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float:DF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && flag_sw_cmov == 0 && ! TARGET_SW8A" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:DF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); +// operands[2] = gen_rtx_REG (DImode, REGNO (operands[0])); + // operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0])); +}) + +(define_expand "floatditf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:DI 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_cvt (FLOAT, operands); DONE;") + +(define_expand "floatunsditf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:DI 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_FP" +{ + if (sw_64_fptm >= SW_64_FPTM_SU) + operands[1] = force_reg (SFmode, operands[1]); +}) + +;; The Unicos/Mk assembler doesn't support cvtst, but we've already +;; asserted that sw_64_fptm == SW_64_FPTM_N. + +(define_insn "*cmpsf_internal" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (match_operator:SF 1 "sw_64_fp_comparison_operator" + [(match_operand:SF 2 "reg_or_0_operand" "fG,fG") + (match_operand:SF 3 "reg_or_0_operand" "fG,fG")]))] + "TARGET_FP && flag_sw_sdsame == 0 && flag_sw_sf_cmpsel" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*cmpsf_internal_same" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 1 "sw_64_fp_comparison_operator" + [(match_operand:SF 2 "reg_or_0_operand" "fG,fG") + (match_operand:SF 3 "reg_or_0_operand" "fG,fG")]))] + "TARGET_FP && flag_sw_sdsame == 1 && flag_sw_sf_cmpsel" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "*extendsfdf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fcvtsd %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) +(define_insn "*extendsfdf2_ieee_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fcvtsd %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*extendsfdf2_internal_1" + [(set (match_operand:DF 0 "register_operand" "=&f,f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_nofcpys == 1" + "@ + fcvtsd %1,%0 + fld%,%U1 %0,%1 + fst%-%U0 %1,%0" + [(set_attr "type" "fcpys,fld,fst")]) + +(define_insn "*extendsfdf2_internal_2" + [(set (match_operand:DF 0 "register_operand" "=&f,f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_nofcpys == 0" + "@ + fcvtsd %1,%0 \;fcpys %0,%0,%0 + fld%, %0,%1 + fst%- %1,%0" + [(set_attr "type" "fcpys,fld,fst")]) + +;; Use register_operand for operand 1 to prevent compress_float_constant +;; from doing something silly. When optimizing we'll put things back +;; together anyway. +(define_expand "extendsftf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:SF 1 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" +{ + rtx tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (tmp, operands[1])); + emit_insn (gen_extenddftf2 (operands[0], tmp)); + DONE; +}) + +(define_expand "extenddftf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:DF 1 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;") + +(define_insn "*truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=&f,&f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 0" + "fcvt%-%,%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*truncdfsf2_same" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "flag_sw_sdsame == 1" + "fcvt%-%,%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))] + "TARGET_FP" + "" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_expand "trunctfdf2" + [(use (match_operand:DF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;") + +(define_expand "trunctfsf2" + [(use (match_operand:SF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_FP && TARGET_HAS_XFLOATING_LIBS" +{ + rtx tmpf, sticky, arg, lo, hi; + + tmpf = gen_reg_rtx (DFmode); + sticky = gen_reg_rtx (DImode); + arg = copy_to_mode_reg (TFmode, operands[1]); + lo = gen_lowpart (DImode, arg); + hi = gen_highpart (DImode, arg); + + /* Convert the low word of the TFmode value into a sticky rounding bit, + then or it into the low bit of the high word. This leaves the sticky + bit at bit 48 of the fraction, which is representable in DFmode, + which prevents rounding error in the final conversion to SFmode. */ + + emit_insn (gen_rtx_SET (sticky, gen_rtx_NE (DImode, lo, const0_rtx))); + emit_insn (gen_iordi3 (hi, hi, sticky)); + emit_insn (gen_trunctfdf2 (tmpf, arg)); + emit_insn (gen_truncdfsf2 (operands[0], tmpf)); + DONE; +}) + +;; Next are all the integer comparisons, and conditional moves and branches +;; and some of the related define_expand's and define_split's. + +(define_insn "*setcc_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "sw_64_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmp%C1 %2,%3,%0" + [(set_attr "type" "icmp")]) + +;; Yes, we can technically support reg_or_8bit_operand in operand 2, +;; but that's non-canonical rtl and allowing that causes inefficiencies +;; from cse on. +(define_insn "*setcc_swapped_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "sw_64_swapped_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_0_operand" "rJ")]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmp%c1 %r3,%2,%0" + [(set_attr "type" "icmp")]) + +;; Use match_operator rather than ne directly so that we can match +;; multiple integer modes. +(define_insn "*setne_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (const_int 0)]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_CODE (operands[1]) == NE + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmpult $31,%2,%0" + [(set_attr "type" "icmp")]) + +;; The mode folding trick can't be used with const_int operands, since +;; reload needs to know the proper mode. +;; +;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand +;; in order to create more pairs of constants. As long as we're allowing +;; two constants at the same time, and will have to reload one of them... + +(define_insn "*movcc_internal" + [(set (match_operand:IMODE 0 "register_operand" "=r,r,r,r") + (if_then_else:IMODE + (match_operator 2 "signed_comparison_operator" + [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") + (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) + (match_operand:IMODE 1 "add_operand" "rI,0,rI,0") + (match_operand:IMODE 5 "add_operand" "0,rI,0,rI")))] + "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)" + "@ + sel%C2 %r3,%1,%0,%0 + sel%D2 %r3,%5,%0,%0 + sel%c2 %r4,%1,%0,%0 + sel%d2 %r4,%5,%0,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movcc_lbc" + [(set (match_operand:IMODE 0 "register_operand" "=r,r") + (if_then_else:IMODE + (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0") + (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + sellbc %r2,%1,%0,%0 + sellbs %r2,%3,%0,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movcc_lbs" + [(set (match_operand:IMODE 0 "register_operand" "=r,r") + (if_then_else:IMODE + (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0") + (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + sellbs %r2,%1,%0,%0 + sellbc %r2,%3,%0,%0" + [(set_attr "type" "icmov")]) + +;; For ABS, we have two choices, depending on whether the input and output +;; registers are the same or not. +(define_expand "absdi2" + [(set (match_operand:DI 0 "register_operand") + (abs:DI (match_operand:DI 1 "register_operand")))] + "" +{ + if (rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode))); + else + emit_insn (gen_absdi2_diff (operands[0], operands[1])); + DONE; +}) + +(define_expand "absdi2_same" + [(set (match_operand:DI 1 "register_operand") + (neg:DI (match_operand:DI 0 "register_operand"))) + (set (match_dup 0) + (if_then_else:DI (ge (match_dup 0) (const_int 0)) + (match_dup 0) + (match_dup 1)))]) + +(define_expand "absdi2_diff" + [(set (match_operand:DI 0 "register_operand") + (neg:DI (match_operand:DI 1 "register_operand"))) + (set (match_dup 0) + (if_then_else:DI (lt (match_dup 1) (const_int 0)) + (match_dup 0) + (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (abs:DI (match_dup 0))) + (clobber (match_operand:DI 1 "register_operand"))] + "" + [(set (match_dup 1) (neg:DI (match_dup 0))) + (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (abs:DI (match_operand:DI 1 "register_operand")))] + "! rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 0) (neg:DI (match_dup 1))) + (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (neg:DI (abs:DI (match_dup 0)))) + (clobber (match_operand:DI 1 "register_operand"))] + "" + [(set (match_dup 1) (neg:DI (match_dup 0))) + (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (neg:DI (abs:DI (match_operand:DI 1 "register_operand"))))] + "! rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 0) (neg:DI (match_dup 1))) + (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_insn "3" + [(set (match_operand:I12MODE 0 "register_operand" "=r") + (any_maxmin:I12MODE + (match_operand:I12MODE 1 "reg_or_0_operand" "%rJ") + (match_operand:I12MODE 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + " %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_expand "smaxdi3" + [(set (match_dup 3) + (le:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (smax:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_insn "*smax_const0" + [(set (match_operand:DI 0 "register_operand" "=r") + (smax:DI (match_operand:DI 1 "register_operand" "0") + (const_int 0)))] + "" + "sellt %0,0,%0,%0" + [(set_attr "type" "icmov")]) + +(define_expand "smindi3" + [(set (match_dup 3) + (lt:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (smin:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_insn "*smin_const0" + [(set (match_operand:DI 0 "register_operand" "=r") + (smin:DI (match_operand:DI 1 "register_operand" "0") + (const_int 0)))] + "" + "selgt %0,0,%0,%0" + [(set_attr "type" "icmov")]) + +(define_expand "umaxdi3" + [(set (match_dup 3) + (leu:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (umax:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_expand "umindi3" + [(set (match_dup 3) + (ltu:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (umin:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_insn "*bcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (label_ref (match_operand 0)) + (pc)))] + "" + "b%C1 %r2,%0" + [(set_attr "type" "ibr")]) + +(define_insn_and_split "*branchcombine" + [(set (pc) + (if_then_else (match_operator 1 "sw_64_branch_combination" + [(match_operand:DI 2 "register_operand") + (match_operand:DI 3 "reg_or_8bit_operand")]) + (label_ref (match_operand 0)) + (pc)))] +"flag_sw_branch_combination==1 + && (can_create_pseudo_p ()) && operands[3]!=CONST0_RTX (DImode)" +"#" +"&& 1" + [(parallel + [(set (pc) + (if_then_else + (match_op_dup 1 + [(match_dup 2) + (match_dup 3)]) + (label_ref (match_dup 0)) + (pc))) + (clobber (match_dup 4))])] +{ + operands[4]=gen_reg_rtx (DImode); +}) + +(define_insn "bcc_ne" + [(parallel + [(set (pc) + (if_then_else + (match_operator 1 "sw_64_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")]) + (label_ref (match_operand 0)) + (pc))) + (clobber (match_operand:DI 4 "register_operand" "=r"))])] + "flag_sw_branch_combination==1" + "cmp%C1 %r2,%3,%r4 + bne %r4,%0" + [(set_attr "type" "ibr")]) + +(define_insn "bcc_eq" + [(parallel + [(set (pc) + (if_then_else + (match_operator 1 "sw_64_swapped_branch_combination" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")]) + (label_ref (match_operand 0)) + (pc))) + (clobber (match_operand:DI 4 "register_operand" "=r"))])] + "flag_sw_branch_combination==1" + "cmp%D1 %r2,%3,%r4 + beq %r4,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*bcc_reverse" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (const_int 0)]) + + (pc) + (label_ref (match_operand 0))))] + "" + "b%c1 %2,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*blbs_normal" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "" + "blbs %r1,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*blbc_normal" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "" + "blbc %r1,%0" + [(set_attr "type" "ibr")]) + +(define_split + [(parallel + [(set (pc) + (if_then_else + (match_operator 1 "comparison_operator" + [(zero_extract:DI (match_operand:DI 2 "register_operand") + (const_int 1) + (match_operand:DI 3 "const_int_operand")) + (const_int 0)]) + (label_ref (match_operand 0)) + (pc))) + (clobber (match_operand:DI 4 "register_operand"))])] + "INTVAL (operands[3]) != 0" + [(set (match_dup 4) + (lshiftrt:DI (match_dup 2) (match_dup 3))) + (set (pc) + (if_then_else (match_op_dup 1 + [(zero_extract:DI (match_dup 4) + (const_int 1) + (const_int 0)) + (const_int 0)]) + (label_ref (match_dup 0)) + (pc)))] +) + + +;; The following are the corresponding floating-point insns. Recall +;; we need to have variants that expand the arguments from SFmode +;; to DFmode. + +(define_insn "*cmpdf_internal" + [(set (match_operand:DF 0 "register_operand" "=&f,&f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG,fG") + (match_operand:DF 3 "reg_or_0_operand" "fG,fG")]))] + "TARGET_FP && flag_sw_sdsame == 0" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) +(define_insn "*cmpdf_internal_same" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG,fG") + (match_operand:DF 3 "reg_or_0_operand" "fG,fG")]))] + "TARGET_FP && flag_sw_sdsame == 1" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "sw_64_fptm < SW_64_FPTM_SU") + ] + (symbol_ref "true")))]) + +(define_insn "*cmpdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=&f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) +(define_insn "*cmpdf_ext1_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=&f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) +(define_insn "*cmpdf_ext2_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=&f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) +(define_insn "*cmpdf_ext3_same" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "sw_64_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1" + "fcmp%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*movcc_internal" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (if_then_else:FMODE + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:FMODE 1 "reg_or_0_operand" "fG,0") + (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP" + "@ + fsel%C3 %R4,%R1,%0,%0 + fsel%D3 %R4,%R5,%0,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext1" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0")) + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" + "@ + fsel%C3 %R4,%R1,%0,%0 + fsel%D3 %R4,%R5,%0,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext2" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:DF 1 "reg_or_0_operand" "fG,0") + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" + "@ + fsel%C3 %R4,%R1,%0,%0 + fsel%D3 %R4,%R5,%0,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext3" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (if_then_else:SF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:SF 1 "reg_or_0_operand" "fG,0") + (match_operand:SF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" + "@ + fsel%C3 %R4,%R1,%0,%0 + fsel%D3 %R4,%R5,%0,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext4" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0")) + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" + "@ + fsel%C3 %R4,%R1,%0,%0 + fsel%D3 %R4,%R5,%0,%0" + [(set_attr "type" "fcmov")]) + +(define_expand "smaxdf3" + [(set (match_dup 3) + (le:DF (match_operand:DF 1 "reg_or_0_operand") + (match_operand:DF 2 "reg_or_0_operand"))) + (set (match_operand:DF 0 "register_operand") + (if_then_else:DF (eq (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "smindf3" + [(set (match_dup 3) + (lt:DF (match_operand:DF 1 "reg_or_0_operand") + (match_operand:DF 2 "reg_or_0_operand"))) + (set (match_operand:DF 0 "register_operand") + (if_then_else:DF (ne (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "smaxsf3" + [(set (match_dup 3) + (le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand")))) + (set (match_operand:SF 0 "register_operand") + (if_then_else:SF (eq (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "sminsf3" + [(set (match_dup 3) + (lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand")))) + (set (match_operand:SF 0 "register_operand") + (if_then_else:SF (ne (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_insn "*fbcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "const0_operand" "G")]) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_FP" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +(define_insn "*fbcc_ext_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "const0_operand" "G")]) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_FP" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +;; These are the main define_expand's used to make conditional branches +;; and compares. + +(define_expand "cbranchsf4" + [(use (match_operator 0 "sw_64_cbranch_operator" + [(match_operand:SF 1 "reg_or_0_operand") + (match_operand:SF 2 "reg_or_0_operand")])) + (use (match_operand 3))] + "TARGET_FP && flag_sw_sf_cmpsel" + "sw_64_emit_conditional_branch (operands, SFmode); DONE;") + +(define_insn "*sfbcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:SF 2 "reg_or_0_operand" "fG") + (match_operand:SF 3 "const0_operand" "G")]) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_FP && flag_sw_sf_cmpsel" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +(define_insn "*movsfcc_internal" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (if_then_else:FMODE + (match_operator 3 "signed_comparison_operator" + [(match_operand:SF 4 "reg_or_0_operand" "fG,fG") + (match_operand:SF 2 "const0_operand" "G,G")]) + (match_operand:FMODE 1 "reg_or_0_operand" "fG,0") + (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && flag_sw_sf_cmpsel" + "@ + fsel%C3 %R4,%R1,%0,%0 + fsel%D3 %R4,%R5,%0,%0" + [(set_attr "type" "fcmov")]) + +(define_expand "cbranchdf4" + [(use (match_operator 0 "sw_64_cbranch_operator" + [(match_operand:DF 1 "reg_or_0_operand") + (match_operand:DF 2 "reg_or_0_operand")])) + (use (match_operand 3))] + "TARGET_FP" + "sw_64_emit_conditional_branch (operands, DFmode); DONE;") + +(define_expand "cbranchtf4" + [(use (match_operator 0 "sw_64_cbranch_operator" + [(match_operand:TF 1 "general_operand") + (match_operand:TF 2 "general_operand")])) + (use (match_operand 3))] + "TARGET_HAS_XFLOATING_LIBS" + "sw_64_emit_conditional_branch (operands, TFmode); DONE;") + +(define_expand "cbranchdi4" + [(use (match_operator 0 "sw_64_cbranch_operator" + [(match_operand:DI 1 "general_operand") + (match_operand:DI 2 "general_operand")])) + (use (match_operand 3))] + "" + "sw_64_emit_conditional_branch (operands, DImode); DONE;") + +(define_expand "cstoredf4" + [(use (match_operator:DI 1 "sw_64_cbranch_operator" + [(match_operand:DF 2 "reg_or_0_operand") + (match_operand:DF 3 "reg_or_0_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "TARGET_FP" +{ + if (sw_64_emit_setcc (operands, DFmode)) + DONE; + else + FAIL; +}) + +(define_expand "cstoretf4" + [(use (match_operator:DI 1 "sw_64_cbranch_operator" + [(match_operand:TF 2 "general_operand") + (match_operand:TF 3 "general_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" +{ + if (sw_64_emit_setcc (operands, TFmode)) + DONE; + else + FAIL; +}) + +(define_expand "cstoredi4" + [(use (match_operator:DI 1 "sw_64_cbranch_operator" + [(match_operand:DI 2 "general_operand") + (match_operand:DI 3 "general_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "" +{ + if (sw_64_emit_setcc (operands, DImode)) + DONE; + else + FAIL; +}) + +;; These are the main define_expand's used to make conditional moves. + +(define_expand "movcc" + [(set (match_operand:I48MODE 0 "register_operand") + (if_then_else:I48MODE + (match_operand 1 "comparison_operator") + (match_operand:I48MODE 2 "reg_or_8bit_operand") + (match_operand:I48MODE 3 "reg_or_8bit_operand")))] + "" +{ + operands[1] = sw_64_emit_conditional_move (operands[1], mode); + if (operands[1] == 0) + FAIL; +}) + +(define_expand "movcc" + [(set (match_operand:FMODE 0 "register_operand") + (if_then_else:FMODE + (match_operand 1 "comparison_operator") + (match_operand:FMODE 2 "reg_or_8bit_operand") + (match_operand:FMODE 3 "reg_or_8bit_operand")))] + "" +{ + operands[1] = sw_64_emit_conditional_move (operands[1], mode); + if (operands[1] == 0) + FAIL; +}) + +;; These define_split definitions are used in cases when comparisons have +;; not be stated in the correct way and we need to reverse the second +;; comparison. For example, x >= 7 has to be done as x < 6 with the +;; comparison that tests the result being reversed. We have one define_split +;; for each use of a comparison. They do not match valid insns and need +;; not generate valid insns. +;; +;; We can also handle equality comparisons (and inequality comparisons in +;; cases where the resulting add cannot overflow) by doing an add followed by +;; a comparison with zero. This is faster since the addition takes one +;; less cycle than a compare when feeding into a conditional move. +;; For this case, we also have an SImode pattern since we can merge the add +;; and sign extend and the order doesn't matter. +;; +;; We do not do this for floating-point, since it isn't clear how the "wrong" +;; operation could have been generated. + +(define_split + [(set (match_operand:DI 0 "register_operand") + (if_then_else:DI + (match_operator 1 "comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand") + (match_operand:DI 3 "reg_or_cint_operand")]) + (match_operand:DI 4 "reg_or_cint_operand") + (match_operand:DI 5 "reg_or_cint_operand"))) + (clobber (match_operand:DI 6 "register_operand"))] + "operands[3] != const0_rtx" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))] +{ + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU); + + /* If we are comparing for equality with a constant and that constant + appears in the arm when the register equals the constant, use the + register since that is more likely to match (and to produce better code + if both would). */ + + if (code == EQ && CONST_INT_P (operands[3]) + && rtx_equal_p (operands[4], operands[3])) + operands[4] = operands[2]; + + else if (code == NE && CONST_INT_P (operands[3]) + && rtx_equal_p (operands[5], operands[3])) + operands[5] = operands[2]; + + if (code == NE || code == EQ + || (extended_count (operands[2], DImode, unsignedp) >= 1 + && extended_count (operands[3], DImode, unsignedp) >= 1)) + { + if (CONST_INT_P (operands[3])) + operands[7] = gen_rtx_PLUS (DImode, operands[2], + GEN_INT (- INTVAL (operands[3]))); + else + operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]); + + operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx); + } + + else if (code == EQ || code == LE || code == LT + || code == LEU || code == LTU) + { + operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]); + operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx); + } + else + { + operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode, + operands[2], operands[3]); + operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx); + } +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (if_then_else:DI + (match_operator 1 "comparison_operator" + [(match_operand:SI 2 "reg_or_0_operand") + (match_operand:SI 3 "reg_or_cint_operand")]) + (match_operand:DI 4 "reg_or_8bit_operand") + (match_operand:DI 5 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 6 "register_operand"))] + "operands[3] != const0_rtx + && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))] +{ + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU); + rtx tem; + + if ((code != NE && code != EQ + && ! (extended_count (operands[2], DImode, unsignedp) >= 1 + && extended_count (operands[3], DImode, unsignedp) >= 1))) + FAIL; + + if (CONST_INT_P (operands[3])) + tem = gen_rtx_PLUS (SImode, operands[2], + GEN_INT (- INTVAL (operands[3]))); + else + tem = gen_rtx_MINUS (SImode, operands[2], operands[3]); + + operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem); + operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode, + operands[6], const0_rtx); +}) + +;; Prefer to use cmp and arithmetic when possible instead of a cmove. + +(define_split + [(set (match_operand 0 "register_operand") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand") + (const_int 0)]) + (match_operand 3 "const_int_operand") + (match_operand 4 "const_int_operand")))] + "" + [(const_int 0)] +{ + if (sw_64_split_conditional_move (GET_CODE (operands[1]), operands[0], + operands[2], operands[3], operands[4])) + DONE; + else + FAIL; +}) + +;; ??? Why combine is allowed to create such non-canonical rtl, I don't know. +;; Oh well, we match it in movcc, so it must be partially our fault. +(define_split + [(set (match_operand 0 "register_operand") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(const_int 0) + (match_operand:DI 2 "reg_or_0_operand")]) + (match_operand 3 "const_int_operand") + (match_operand 4 "const_int_operand")))] + "" + [(const_int 0)] +{ + if (sw_64_split_conditional_move (swap_condition (GET_CODE (operands[1])), + operands[0], operands[2], operands[3], + operands[4])) + DONE; + else + FAIL; +}) + +(define_insn_and_split "*cmp_sadd_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (if_then_else:DI + (match_operator 1 "sw_64_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}) + +(define_insn_and_split "*cmp_sadd_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (if_then_else:SI + (match_operator 1 "sw_64_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = gen_lowpart (DImode, operands[0]); + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_sadd_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (plus:SI (if_then_else:SI + (match_operator 1 "sw_64_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO")))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4))))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_ssub_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (if_then_else:DI + (match_operator 1 "sw_64_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}) + +(define_insn_and_split "*cmp_ssub_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (if_then_else:SI + (match_operator 1 "sw_64_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = gen_lowpart (DImode, operands[0]); + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_ssub_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (if_then_else:SI + (match_operator 1 "sw_64_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI")))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4))))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +;; Here are the CALL and unconditional branch insns. Calls on NT and SYSV +;; work differently, so we have different patterns for each. + +(define_expand "call" + [(use (match_operand:DI 0)) + (use (match_operand 1)) + (use (match_operand 2)) + (use (match_operand 3))] + "" +{ + emit_call_insn (gen_call_osf (operands[0], operands[1])); + DONE; +}) + +(define_expand "sibcall" + [(parallel [(call (mem:DI (match_operand 0)) + (match_operand 1)) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])] + "" +{ + gcc_assert (MEM_P (operands[0])); + operands[0] = XEXP (operands[0], 0); +}) + +(define_expand "call_osf" + [(parallel [(call (mem:DI (match_operand 0)) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + operands[0] = XEXP (operands[0], 0); + if (! call_operand (operands[0], Pmode)) + operands[0] = copy_to_mode_reg (Pmode, operands[0]); +}) + + +(define_expand "call_value" + [(use (match_operand 0)) + (use (match_operand:DI 1)) + (use (match_operand 2)) + (use (match_operand 3)) + (use (match_operand 4))] + "" +{ + emit_call_insn (gen_call_value_osf (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand 1)) + (match_operand 2))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])] + "" +{ + gcc_assert (MEM_P (operands[1])); + operands[1] = XEXP (operands[1], 0); +}) + +(define_expand "call_value_osf" + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand 1)) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + if (! call_operand (operands[1], Pmode)) + operands[1] = copy_to_mode_reg (Pmode, operands[1]); +}) + +(define_insn "*call_osf_1_er_noreturn" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + call $26,($27),0 + bsr $26,%0\t\t!samegp + ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "call") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1_er_setfpec0" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3" + "@ + call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%0\t\t!samegp + ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_osf_1_er_setfpec1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1 " + "@ + call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%0\t\t!samegp + ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_osf_1_er" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS" + "@ + call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%0\t\t!samegp + ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. Consider the case of { bar (); while (1); }. +;;(define_peephole2 +;; [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand")) +;; (match_operand 1)) +;; (use (reg:DI 29)) +;; (clobber (reg:DI 26))])] +;; "TARGET_EXPLICIT_RELOCS && reload_completed +;; && ! samegp_function_operand (operands[0], Pmode) +;; && (peep2_regno_dead_p (1, 29) +;; || find_reg_note (insn, REG_NORETURN, NULL_RTX))" +;; [(parallel [(call (mem:DI (match_dup 2)) +;; (match_dup 1)) +;; (use (reg:DI 29)) +;; (use (match_dup 0)) +;; (use (match_dup 3)) +;; (clobber (reg:DI 26))])] +;;{ +;; if (CONSTANT_P (operands[0])) +;; { +;; operands[2] = gen_rtx_REG (Pmode, 27); +;; operands[3] = GEN_INT (sw_64_next_sequence_number++); +;; emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx, +;; operands[0], operands[3])); +;; } +;; else +;; { +;; operands[2] = operands[0]; +;; operands[0] = const0_rtx; +;; operands[3] = const0_rtx; +;; } +;;}) + +;;(define_peephole2 +;; [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand")) +;; (match_operand 1)) +;; (use (reg:DI 29)) +;; (clobber (reg:DI 26))])] +;; "TARGET_EXPLICIT_RELOCS && reload_completed +;; && ! samegp_function_operand (operands[0], Pmode) +;; && ! (peep2_regno_dead_p (1, 29) +;; || find_reg_note (insn, REG_NORETURN, NULL_RTX))" +;; [(parallel [(call (mem:DI (match_dup 2)) +;; (match_dup 1)) +;; (set (match_dup 5) +;; (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1)) +;; (use (match_dup 0)) +;; (use (match_dup 4)) +;; (clobber (reg:DI 26))]) +;; (set (match_dup 5) +;; (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))] +;;{ +;; if (CONSTANT_P (operands[0])) +;; { +;; operands[2] = gen_rtx_REG (Pmode, 27); +;; operands[4] = GEN_INT (sw_64_next_sequence_number++); +;; emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx, +;; operands[0], operands[4])); +;; } +;; else +;; { +;; operands[2] = operands[0]; +;; operands[0] = const0_rtx; +;; operands[4] = const0_rtx; +;; } +;; operands[3] = GEN_INT (sw_64_next_sequence_number++); +;; operands[5] = pic_offset_table_rtx; +;;}) + + +(define_insn "*call_osf_2_er_nogp" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1)) + (use (reg:DI 29)) + (use (match_operand 2)) + (use (match_operand 3 "const_int_operand")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS" + "call $26,(%0),%2%J3" + [(set_attr "type" "call")]) + + +(define_insn "*call_osf_2_er_setfpec0" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1)) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")] + UNSPEC_LDGP1)) + (use (match_operand 2)) + (use (match_operand 3 "const_int_operand")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3 " + "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4" + [(set_attr "type" "call") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_osf_2_er_setfpec1" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1)) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")] + UNSPEC_LDGP1)) + (use (match_operand 2)) + (use (match_operand 3 "const_int_operand")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1 " + "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4" + [(set_attr "type" "call") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_osf_2_er" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1)) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")] + UNSPEC_LDGP1)) + (use (match_operand 2)) + (use (match_operand 3 "const_int_operand")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS" + "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4" + [(set_attr "type" "call") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_osf_1_noreturn" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + call $26,($27),0 + bsr $26,$%0..ng + call $26,%0" + [(set_attr "type" "call") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS" + "@ + call $26,($27),0\;ldgp $29,0($26) + bsr $26,$%0..ng + call $26,%0\;ldgp $29,0($26)" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*sibcall_osf_1_er" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s")) + (match_operand 1)) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "TARGET_EXPLICIT_RELOCS" + "@ + br $31,%0\t\t!samegp + ldl $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "call") + (set_attr "length" "*,8")]) + +;; Note that the assembler expands "jmp foo" with $at, which +;; doesn't do what we want. +(define_insn "*sibcall_osf_1" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s")) + (match_operand 1)) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "! TARGET_EXPLICIT_RELOCS" + "@ + br $31,$%0..ng + ldi $27,%0\;jmp $31,($27),%0" + [(set_attr "type" "call") + (set_attr "length" "*,8")]) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0) + (const_int 0)) + (match_operand 1) + (match_operand 2)])] + "" +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0") + (set_attr "type" "none")]) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0)))] + "" + "br $31,%l0" + [(set_attr "type" "ibr")]) + +;; "ret $31,($26),1" +(define_expand "return" + [(return)] + "direct_return ()") + +(define_insn "*return_internal" + [(return)] + "reload_completed" +{ + return "ret $31,($26),1"; +} + + [(set_attr "type" "ibr")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:DI 0 "register_operand" "r"))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +(define_expand "tablejump" + [(parallel [(set (pc) + (match_operand 0 "register_operand")) + (use (label_ref:DI (match_operand 1)))])] + "" +{ + rtx dest = gen_reg_rtx (DImode); + emit_insn (gen_extendsidi2 (dest, operands[0])); + emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest)); + operands[0] = dest; +}) + +(define_insn "*tablejump_internal" + [(set (pc) + (match_operand:DI 0 "register_operand" "r")) + (use (label_ref (match_operand 1)))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +;; call_pal->sys_call 0x86 +;; Cache flush. Used by sw_64_trampoline_init. 0x86 is PAL_imb, but we don't +;; want to have to include pal.h in our .s file. +(define_insn "imb" + [(unspec_volatile [(const_int 0)] UNSPECV_IMB)] + "" + "sys_call 0x86" + [(set_attr "type" "callpal")]) + +(define_expand "clear_cache" + [(match_operand:DI 0) ; region start + (match_operand:DI 1)] ; region end + "" +{ + emit_insn (gen_imb ()); + DONE; +}) + +;; call_pal ->sys_call 0x80 +;; BUGCHK is documented common to SYSV PALcode. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0)) + (use (reg:DI 29))] + "" + "sys_call 0x80" + [(set_attr "type" "callpal")]) + +;; For userland, we load the thread pointer from the TCB. +;; For the kernel, we load the per-cpu private value. + +;; call_pal->sys_call xx +(define_insn "get_thread_pointerdi" + [(set (match_operand:DI 0 "register_operand" "=v") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "" +{ + if (TARGET_TLS_KERNEL) + return "sys_call 0x32"; + else if (flag_sw_rtid == 1) + return "rtid %0"; + else + return "sys_call 0x9e"; + ;;return "rtid %0"; +} + [(set_attr "type" "callpal")]) + +;; For completeness, and possibly a __builtin function, here's how to +;; set the thread pointer. Since we don't describe enough of this +;; quantity for CSE, we have to use a volatile unspec, and then there's +;; not much point in creating an R16_REG register class. + +(define_expand "set_thread_pointerdi" + [(set (reg:DI 16) (match_operand:DI 0 "input_operand")) + (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)] + "") + +;; call_pal->sys_call xx +(define_insn "*set_tp" + [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)] + "" +{ + if (TARGET_TLS_KERNEL) + return "sys_call 0x31"; + else + return "sys_call 0x9f"; +} + [(set_attr "type" "callpal")]) + + +;; Finally, we have the basic data motion insns. The byte and word insns +;; are done via define_expand. Start with the floating-point insns, since +;; they are simpler. + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand") + (match_operand:SF 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], SFmode)) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn "*movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r") + (match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))] + "register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode)" + "@ + fcpys %R1,%R1,%0 + fld%,%U1 %0,%1 + bis $31,%r1,%0 + ldw %0,%1 + fst%,%U0 %R1,%0 + stw %r1,%0 + ifmovs %1,%0 + fimovs %1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi") + (set_attr "isa" "*,*,*,*,*,*,fix,fix")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand") + (match_operand:DF 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], DFmode)) + operands[1] = force_reg (DFmode, operands[1]); +}) +(define_insn "*movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r") + (match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))] + "register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode)" + "@ + fcpys %R1,%R1,%0 + fld%-%U1 %0,%1 + bis $31,%r1,%0 + ldl %0,%1 + fst%-%U0 %R1,%0 + stl %r1,%0 + ifmovd %1,%0 + fimovd %1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi") + (set_attr "isa" "*,*,*,*,*,*,fix,fix")]) + +;; Subregs suck for register allocation. Pretend we can move TFmode +;; data between general registers until after reload. +;; ??? Is this still true now that we have the lower-subreg pass? + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand") + (match_operand:TF 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], TFmode)) + operands[1] = force_reg (TFmode, operands[1]); +}) + +(define_insn_and_split "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,m") + (match_operand:TF 1 "input_operand" "rmG,rG"))] + "register_operand (operands[0], TFmode) + || reg_or_0_operand (operands[1], TFmode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + "sw_64_split_tmode_pair (operands, TFmode, true);") + +;; We do two major things here: handle mem->mem and construct long +;; constants. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand") + (match_operand:SI 1 "general_operand"))] + "" +{ + if (sw_64_expand_mov (SImode, operands)) + DONE; +}) + +(define_insn "*movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,r") + (match_operand:SI 1 "input_operand" "rJ,K,L,T,s,n,m,rJ,s"))] + "register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode)" + "@ + bis $31,%r1,%0 + ldi %0,%1($31) + ldih %0,%h1($31) + # + # + # + ldw%U1 %0,%1 + stw%U0 %r1,%0 + ldi %0,%1" + [(set_attr "type" "ilog,iadd,iadd,iadd,iadd,multi,ild,ist,ldsym") + (set_attr "isa" "*,*,*,*,*,*,*,*,vms")]) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "non_add_const_operand"))] + "" + [(const_int 0)] +{ + if (sw_64_split_const_mov (SImode, operands)) + DONE; + else + FAIL; +}) + +(define_insn "*movdi_er_low_l" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "local_symbolic_operand")))] + "TARGET_EXPLICIT_RELOCS" +{ + if (true_regnum (operands[1]) == 29) + return "ldi %0,%2(%1)\t\t!gprel"; + else + return "ldi %0,%2(%1)\t\t!gprellow"; +} + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "small_symbolic_operand"))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (lo_sum:DI (match_dup 2) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "local_symbolic_operand"))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:DI (match_dup 2) (high:DI (match_dup 1)))) + (set (match_dup 0) + (lo_sum:DI (match_dup 0) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(match_operand 0 "some_small_symbolic_operand")] + "" + [(match_dup 0)] + "operands[0] = split_small_symbolic_operand (operands[0]);") + +;; Accepts any symbolic, not just global, since function calls that +;; don't go via bsr still use !literal in hopes of linker relaxation. +(define_insn "movdi_er_high_g" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_LITERAL))] + "TARGET_EXPLICIT_RELOCS" +{ + if (INTVAL (operands[3]) == 0) + return "ldl %0,%2(%1)\t\t!literal"; + else + return "ldl %0,%2(%1)\t\t!literal!%3"; +} + [(set_attr "type" "ldsym")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "global_symbolic_operand"))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1) + (const_int 0)] UNSPEC_LITERAL))] + "operands[2] = pic_offset_table_rtx;") + +(define_insn "movdi_er_tlsgd" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_TLSGD))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[3]) == 0) + return "ldi %0,%2(%1)\t\t!tlsgd"; + else + return "ldi %0,%2(%1)\t\t!tlsgd!%3"; +} +[(set_attr "cannot_copy" "true")]) + + +(define_insn "*movdi_er_tlsrelgot" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_TLSRELGOT))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[3]) == 0) + return "ldih %0,%2(%1)\t\t!tlsrel_got"; + else + return "ldih %0,%2(%1)\t\t!tlsrel_got!%3"; +} +[(set_attr "cannot_copy" "true")]) + + +(define_insn "movdi_er_tlsldm" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPEC_TLSLDM))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[2]) == 0) + return "ldi %0,%&(%1)\t\t!tlsldm"; + else + return "ldi %0,%&(%1)\t\t!tlsldm!%2"; +} +[(set_attr "cannot_copy" "true")]) + +;; insert ldih insn with tlsrelgot relocation before ldl insn with gotdtprel relocation. +(define_insn "*movdi_er_gotdtprel" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_GOTDTPREL))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[3]) == 0) + return "ldl %0,%2(%1)\t\t!gotdtprel"; + else + return "ldl %0,%2(%1)\t\t!gotdtprel!%3"; +} +[(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_insn "*movdi_er_gotdtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand")] + UNSPEC_DTPREL))] + "HAVE_AS_TLS" + "ldl %0,%2(%1)\t\t!gotdtprel" + [(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gotdtp_symbolic_operand"))] + "HAVE_AS_TLS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1)] UNSPEC_DTPREL))] +{ + operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0); + operands[2] = pic_offset_table_rtx; +}) + +(define_insn "*movdi_er_gottprel" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_TPREL))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[3]) == 0) + return "ldl %0,%2(%1)\t\t!gottprel"; + else + return "ldl %0,%2(%1)\t\t!gottprel!%3"; +} +[(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_insn "*movdi_er_gottp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand")] + UNSPEC_TPREL))] + "HAVE_AS_TLS" + "ldl %0,%2(%1)\t\t!gottprel" + [(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gottp_symbolic_operand"))] + "HAVE_AS_TLS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1)] UNSPEC_TPREL))] +{ + operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0); + operands[2] = pic_offset_table_rtx; +}) +(define_insn "*movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r,r,r,r,r,r,r, m, *f,*f, Q, r,*f") + (match_operand:DI 1 "input_operand" + "rJ,K,L,T,s,n,s,m,rJ,*fJ, Q,*f,*f, r"))] + "register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode)" + "@ + mov %r1,%0 + ldi %0,%1($31) + ldih %0,%h1($31) + # + # + # + ldi %0,%1 + ldl%A1%U1 %0,%1 + stl%A0%U0 %r1,%0 + fmov %R1,%0 + fldd%U1 %0,%1 + fstd%U0 %R1,%0 + fimovd %1,%0 + ifmovd %1,%0" + [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof") + (set_attr "isa" "*,*,*,er,er,*,ner,*,*,*,*,*,fix,fix") + (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*,*")]) + +(define_insn "force_movdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")] + UNSPECV_FORCE_MOV))] + "" + "mov %1,%0" + [(set_attr "type" "ilog")]) + +;; We do three major things here: handle mem->mem, put 64-bit constants in +;; memory, and construct long 32-bit constants. + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand") + (match_operand:DI 1 "general_operand"))] + "" +{ + if (sw_64_expand_mov (DImode, operands)) + DONE; +}) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "non_add_const_operand"))] + "" + [(const_int 0)] +{ + if (sw_64_split_const_mov (DImode, operands)) + DONE; + else + FAIL; +}) + +;; We need to prevent reload from splitting TImode moves, because it +;; might decide to overwrite a pointer with the value it points to. +;; In that case we have to do the loads in the appropriate order so +;; that the pointer is not destroyed too early. + +(define_insn_and_split "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,m") + (match_operand:TI 1 "input_operand" "rmJ,rJ"))] + "(register_operand (operands[0], TImode) + /* Prevent rematerialization of constants. */ + && ! CONSTANT_P (operands[1])) + || reg_or_0_operand (operands[1], TImode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + "sw_64_split_tmode_pair (operands, TImode, true);") + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand") + (match_operand:TI 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], TImode)) + operands[1] = force_reg (TImode, operands[1]); + + if (operands[1] == const0_rtx) + ; + /* We must put 64-bit constants in memory. We could keep the + 32-bit constants in TImode and rely on the splitter, but + this doesn't seem to be worth the pain. */ + else if (CONST_SCALAR_INT_P (operands[1])) + { + rtx in[2], out[2], target; + + gcc_assert (can_create_pseudo_p ()); + + split_double (operands[1], &in[0], &in[1]); + + if (in[0] == const0_rtx) + out[0] = const0_rtx; + else + { + out[0] = gen_reg_rtx (DImode); + emit_insn (gen_movdi (out[0], in[0])); + } + + if (in[1] == const0_rtx) + out[1] = const0_rtx; + else + { + out[1] = gen_reg_rtx (DImode); + emit_insn (gen_movdi (out[1], in[1])); + } + + if (!REG_P (operands[0])) + target = gen_reg_rtx (TImode); + else + target = operands[0]; + + emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0])); + emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1])); + + if (target != operands[0]) + emit_insn (gen_rtx_SET (operands[0], target)); + + DONE; + } +}) + +;; These are the partial-word cases. +;; +;; First we have the code to load an aligned word. Operand 0 is the register +;; in which to place the result. It's mode is QImode or HImode. Operand 1 +;; is an SImode MEM at the low-order byte of the proper word. Operand 2 is the +;; number of bits within the word that the value is. Operand 3 is an SImode +;; scratch register. If operand 0 is a hard register, operand 3 may be the +;; same register. It is allowed to conflict with operand 1 as well. + +(define_expand "aligned_loadqi" + [(set (match_operand:SI 3 "register_operand") + (match_operand:SI 1 "memory_operand")) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (subreg:DI (match_dup 3) 0) + (const_int 8) + (match_operand:DI 2 "const_int_operand")))]) + +(define_expand "aligned_loadhi" + [(set (match_operand:SI 3 "register_operand") + (match_operand:SI 1 "memory_operand")) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (subreg:DI (match_dup 3) 0) + (const_int 16) + (match_operand:DI 2 "const_int_operand")))]) + +;; Similar for unaligned loads, where we use the sequence from the +;; Sw_64 Architecture manual. We have to distinguish between little-endian +;; and big-endian systems as the sequences are different. +;; +;; Operand 1 is the address. Operands 2 and 3 are temporaries, where +;; operand 3 can overlap the input and output registers. + +(define_expand "unaligned_loadqi" + [(set (match_operand:DI 2 "register_operand") + (mem:DI (and:DI (match_operand:DI 1 "address_operand") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_dup 2) + (const_int 8) + (ashift:DI (match_dup 3) (const_int 3))))]) + +(define_expand "unaligned_loadhi" + [(set (match_operand:DI 2 "register_operand") + (mem:DI (and:DI (match_operand:DI 1 "address_operand") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_dup 2) + (const_int 16) + (ashift:DI (match_dup 3) (const_int 3))))]) + +;; Storing an aligned byte or word requires two temporaries. Operand 0 is the +;; aligned SImode MEM. Operand 1 is the register containing the +;; byte or word to store. Operand 2 is the number of bits within the word that +;; the value should be placed. Operands 3 and 4 are SImode temporaries. + +(define_expand "aligned_store" + [(set (match_operand:SI 3 "register_operand") + (match_operand:SI 0 "memory_operand")) + (set (subreg:DI (match_dup 3) 0) + (and:DI (subreg:DI (match_dup 3) 0) (match_dup 5))) + (set (subreg:DI (match_operand:SI 4 "register_operand") 0) + (ashift:DI (zero_extend:DI (match_operand 1 "register_operand")) + (match_operand:DI 2 "const_int_operand"))) + (set (subreg:DI (match_dup 4) 0) + (ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0))) + (set (match_dup 0) (match_dup 4))] + "" +{ + operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1])) + << INTVAL (operands[2]))); +}) + +;; For the unaligned byte and halfword cases, we use code similar to that +;; in the ;; Architecture book, but reordered to lower the number of registers +;; required. Operand 0 is the address. Operand 1 is the data to store. +;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may +;; be the same temporary, if desired. If the address is in a register, +;; operand 2 can be that register. + +(define_expand "@unaligned_store" + [(set (match_operand:DI 3 "register_operand") + (mem:DI (and:DI (match_operand:DI 0 "address_operand") + (const_int -8)))) + (set (match_operand:DI 2 "register_operand") + (match_dup 0)) + (set (match_dup 3) + (and:DI (not:DI (ashift:DI (match_dup 5) + (ashift:DI (match_dup 2) (const_int 3)))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand") + (ashift:DI (zero_extend:DI + (match_operand:I12MODE 1 "register_operand")) + (ashift:DI (match_dup 2) (const_int 3)))) + (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3))) + (set (mem:DI (and:DI (match_dup 0) (const_int -8))) + (match_dup 4))] + "" + "operands[5] = GEN_INT (GET_MODE_MASK (mode));") + +;; Here are the define_expand's for QI and HI moves that use the above +;; patterns. We have the normal sets, plus the ones that need scratch +;; registers for reload. + +(define_expand "mov" + [(set (match_operand:I12MODE 0 "nonimmediate_operand") + (match_operand:I12MODE 1 "general_operand"))] + "" +{ + if (sw_64_expand_mov (mode, operands)) + DONE; +}) + +(define_insn "*movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))] + "register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode)" + "@ + bis $31,%r1,%0 + ldi %0,%L1($31) + ldbu%U1 %0,%1 + stb%U0 %r1,%0" + [(set_attr "type" "ilog,iadd,ild,ist") + (set_attr "isa" "*,*,bwx,bwx")]) + +(define_insn "*movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))] + "register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode)" + "@ + bis $31,%r1,%0 + ldi %0,%L1($31) + ldhu%U1 %0,%1 + sth%U0 %r1,%0" + [(set_attr "type" "ilog,iadd,ild,ist") + (set_attr "isa" "*,*,bwx,bwx")]) + +;; Helpers for the above. The way reload is structured, we can't +;; always get a proper address for a stack slot during reload_foo +;; expansion, so we must delay our address manipulations until after. + +(define_insn_and_split "@reload_in_aligned" + [(set (match_operand:I12MODE 0 "register_operand" "=r") + (match_operand:I12MODE 1 "memory_operand" "m"))] + "!TARGET_BWX && (reload_in_progress || reload_completed)" + "#" + "!TARGET_BWX && reload_completed" + [(const_int 0)] +{ + rtx aligned_mem, bitnum; + get_aligned_mem (operands[1], &aligned_mem, &bitnum); + emit_insn (gen_aligned_load + (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum, + gen_rtx_REG (SImode, REGNO (operands[0])))); + DONE; +}) + +(define_mode_iterator VEC [V8QI V4HI V2SI]) +(define_mode_iterator VEC12 [V8QI V4HI]) + +(define_expand "mov" + [(set (match_operand:VEC 0 "nonimmediate_operand") + (match_operand:VEC 1 "general_operand"))] + "" +{ + if (sw_64_expand_mov (mode, operands)) + DONE; +}) + +(define_split + [(set (match_operand:VEC 0 "register_operand") + (match_operand:VEC 1 "non_zero_const_operand"))] + "" + [(const_int 0)] +{ + if (sw_64_split_const_mov (mode, operands)) + DONE; + else + FAIL; +}) + + +(define_expand "movmisalign" + [(set (match_operand:VEC 0 "nonimmediate_operand") + (match_operand:VEC 1 "general_operand"))] + "flag_sw_unalign_byte != 1 || !TARGET_SW8A" +{ + sw_64_expand_movmisalign (mode, operands); + DONE; +}) + +(define_insn "*mov_fix" + [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f") + (match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))] + "register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode)" + "@ + bis $31,%r1,%0 + # + ldl%A1%U1 %0,%1 + stl%A0%U0 %r1,%0 + fcpys %R1,%R1,%0 + fldd%U1 %0,%1 + fstd%U0 %R1,%0 + fimovd %1,%0 + ifmovd %1,%0" + [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof") + (set_attr "isa" "*,*,*,*,*,*,*,fix,fix")]) + +(define_insn "3" + [(set (match_operand:VEC12 0 "register_operand" "=r") + (any_maxmin:VEC12 + (match_operand:VEC12 1 "reg_or_0_operand" "rW") + (match_operand:VEC12 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + " %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "one_cmpl2" + [(set (match_operand:VEC 0 "register_operand" "=r") + (not:VEC (match_operand:VEC 1 "register_operand" "r")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "and3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (and:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "and %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*andnot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r")) + (match_operand:VEC 2 "register_operand" "r")))] + "" + "bic %2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "ior3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (ior:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "bis %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r")) + (match_operand:VEC 2 "register_operand" "r")))] + "" + "ornot %2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "xor3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (xor:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "xor %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r"))))] + "" + "eqv %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_expand "vec_shl_" + [(set (match_operand:VEC 0 "register_operand") + (ashift:DI (match_operand:VEC 1 "register_operand") + (match_operand:DI 2 "reg_or_6bit_operand")))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +(define_expand "vec_shr_" + [(set (match_operand:VEC 0 "register_operand") + (lshiftrt:DI (match_operand:VEC 1 "register_operand") + (match_operand:DI 2 "reg_or_6bit_operand")))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +;; Bit field extract patterns which use ext[wlq][lh] + +(define_expand "extvmisaligndi" + [(set (match_operand:DI 0 "register_operand") + (sign_extract:DI (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "const_int_operand")))] + "" +{ + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; + + sw_64_expand_unaligned_load (operands[0], operands[1], + INTVAL (operands[2]) / 8, + INTVAL (operands[3]) / 8, 1); + DONE; +}) + +(define_expand "extzvdi" + [(set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "const_int_operand")))] + "" +{ + /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 8 + && INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; +}) + +(define_expand "extzvmisaligndi" + [(set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "const_int_operand")))] + "" +{ + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. + We fail 8-bit fields, falling back on a simple byte load. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; + + sw_64_expand_unaligned_load (operands[0], operands[1], + INTVAL (operands[2]) / 8, + INTVAL (operands[3]) / 8, 0); + DONE; +}) + +(define_expand "insvmisaligndi" + [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand") + (match_operand:DI 1 "const_int_operand") + (match_operand:DI 2 "const_int_operand")) + (match_operand:DI 3 "register_operand"))] + "" +{ + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[2]) % 8 != 0 + || (INTVAL (operands[1]) != 16 + && INTVAL (operands[1]) != 32 + && INTVAL (operands[1]) != 64)) + FAIL; + + sw_64_expand_unaligned_store (operands[0], operands[3], + INTVAL (operands[1]) / 8, + INTVAL (operands[2]) / 8); + DONE; +}) + +;; Block move/clear, see sw_64.c for more details. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "cpymemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand")) + (use (match_operand:DI 2 "immediate_operand")) + (use (match_operand:DI 3 "immediate_operand"))])] + "flag_sw_unalign_byte != 1 || !TARGET_SW8A" +{ + if (sw_64_expand_block_move (operands)) + DONE; + else + FAIL; +}) + +(define_expand "setmemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand 2 "const_int_operand")) + (use (match_operand:DI 1 "immediate_operand")) + (use (match_operand:DI 3 "immediate_operand"))])] + "flag_sw_unalign_byte != 1 || !TARGET_SW8A" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (sw_64_expand_block_clear (operands)) + DONE; + else + FAIL; +}) + +;; Subroutine of stack space allocation. Perform a stack probe. +(define_expand "stack_probe_internal" + [(set (match_dup 1) (match_operand:DI 0 "const_int_operand"))] + "" +{ + operands[1] = gen_rtx_MEM (DImode, plus_constant (Pmode, stack_pointer_rtx, + INTVAL (operands[0]))); + MEM_VOLATILE_P (operands[1]) = 1; + + operands[0] = const0_rtx; +}) + +;; This is how we allocate stack space. If we are allocating a +;; constant amount of space and we know it is less than 4096 +;; bytes, we need do nothing. +;; +;; If it is more than 4096 bytes, we need to probe the stack +;; periodically. +(define_expand "allocate_stack" + [(set (reg:DI 30) + (plus:DI (reg:DI 30) + (match_operand:DI 1 "reg_or_cint_operand"))) + (set (match_operand:DI 0 "register_operand" "=r") + (match_dup 2))] + "" +{ + if (CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < 32768) + { + if (INTVAL (operands[1]) >= 4096) + { + /* We do this the same way as in the prologue and generate explicit + probes. Then we update the stack by the constant. */ + + int probed = 4096; + + emit_insn (gen_stack_probe_internal (GEN_INT (- probed))); + while (probed + 8192 < INTVAL (operands[1])) + emit_insn (gen_stack_probe_internal + (GEN_INT (- (probed += 8192)))); + + if (probed + 4096 < INTVAL (operands[1])) + emit_insn (gen_stack_probe_internal + (GEN_INT (- INTVAL (operands[1])))); + } + + operands[1] = GEN_INT (- INTVAL (operands[1])); + operands[2] = virtual_stack_dynamic_rtx; + } + else + { + rtx_code_label *out_label = 0; + rtx_code_label *loop_label = gen_label_rtx (); + rtx want = gen_reg_rtx (Pmode); + rtx tmp = gen_reg_rtx (Pmode); + rtx memref, test; + + emit_insn (gen_subdi3 (want, stack_pointer_rtx, + force_reg (Pmode, operands[1]))); + + if (!CONST_INT_P (operands[1])) + { + rtx limit = GEN_INT (4096); + out_label = gen_label_rtx (); + test = gen_rtx_LTU (VOIDmode, operands[1], limit); + emit_jump_insn + (gen_cbranchdi4 (test, operands[1], limit, out_label)); + } + + emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096))); + emit_label (loop_label); + memref = gen_rtx_MEM (DImode, tmp); + MEM_VOLATILE_P (memref) = 1; + emit_move_insn (memref, const0_rtx); + emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (-8192))); + test = gen_rtx_GTU (VOIDmode, tmp, want); + emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label)); + + memref = gen_rtx_MEM (DImode, want); + MEM_VOLATILE_P (memref) = 1; + emit_move_insn (memref, const0_rtx); + + if (out_label) + emit_label (out_label); + + emit_move_insn (stack_pointer_rtx, want); + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; + } +}) + +;; This is used by sw_64_expand_prolog to do the same thing as above, +;; except we cannot at that time generate new basic blocks, so we hide +;; the loop in this one insn. + +(define_insn "prologue_stack_probe_loop" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "r")] + UNSPECV_PSPL)] + "" +{ + operands[2] = gen_label_rtx (); + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[2])); + + return "stl $31,-8192(%1)\;subl %0,1,%0\;ldi %1,-8192(%1)\;bne %0,%l2"; +} + [(set_attr "length" "16") + (set_attr "type" "multi")]) + +(define_expand "prologue" + [(const_int 0)] + "" +{ + sw_64_expand_prologue (); + DONE; +}) + +;; These take care of emitting the ldgp insn in the prologue. This will be +;; an ldi/ldih pair and we want to align them properly. So we have two +;; unspec_volatile insns, the first of which emits the ldgp assembler macro +;; and the second of which emits nothing. However, both are marked as type +;; IADD (the default) so the alignment code in sw_64.c does the right thing +;; with them. + +(define_expand "prologue_ldgp" + [(set (match_dup 0) + (unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))] + "" +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 27); + operands[2] = (TARGET_EXPLICIT_RELOCS + ? GEN_INT (sw_64_next_sequence_number++) + : const0_rtx); +}) + +(define_insn "*ldgp_er_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP1))] + "TARGET_EXPLICIT_RELOCS" + "ldih %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPEC_LDGP2))] + "TARGET_EXPLICIT_RELOCS" + "ldi %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*exc_ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP2))] + "TARGET_EXPLICIT_RELOCS" + "ldi %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_PLDGP2))] + "TARGET_EXPLICIT_RELOCS" +{ + return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:"; +} + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP1))] + "" +{ + return "ldgp %0,0(%1)\n$%~..ng:"; +} + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_PLDGP2))] + "" +) + +(define_insn "hardware_prefetch_use_syscall" +[(unspec_volatile [ +(match_operand:DI 0 "register_operand" "=r") +(match_operand:DI 1 "register_operand" "=r") +] UNSPECV_HARDWARE_PREFETCH_CNT)] +"" +{ + return "ldi $16,110($31)\;ldi $18,1($31)\;ldi $19,120($30)\;\ +stl %0,120($30)\;\ +ldl $27,syscall($29)\t\t!literal!%#\;call $26,($27),syscall\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*\;" + ; +} +[(set_attr "type" "multi") + (set_attr "length" "8")]) + +;; The _mcount profiling hook has special calling conventions, and +;; does not clobber all the registers that a normal call would. So +;; hide the fact this is a call at all. + +(define_insn "prologue_mcount" + [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)] + "" +{ + if (TARGET_EXPLICIT_RELOCS) + /* Note that we cannot use a lituse_jsr reloc, since _mcount + cannot be called via the PLT. */ + return "ldl $28,_mcount($29)\t\t!literal\;call $28,($28),_mcount"; + else + return "ldi $28,_mcount\;call $28,($28),_mcount"; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "init_fp" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "register_operand" "r")) + (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))] + "" + "bis $31,%1,%0") + +(define_expand "epilogue" + [(return)] + "" + "sw_64_expand_epilogue ();") + +(define_expand "sibcall_epilogue" + [(return)] + "" +{ + sw_64_expand_epilogue (); + DONE; +}) + +(define_expand "builtin_longjmp" + [(use (match_operand:DI 0 "register_operand" "r"))] + "" +{ + /* The elements of the buffer are, in order: */ + rtx fp = gen_rtx_MEM (Pmode, operands[0]); + rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 8)); + rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 16)); + rtx pv = gen_rtx_REG (Pmode, 27); + + /* This bit is the same as expand_builtin_longjmp. */ + emit_move_insn (pv, lab); + emit_stack_restore (SAVE_NONLOCAL, stack); + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + emit_move_insn (hard_frame_pointer_rtx, fp); + /* Load the label we are jumping through into $27 so that we know + where to look for it when we get back to setjmp's function for + restoring the gp. */ + emit_jump_insn (gen_builtin_longjmp_internal (pv)); + emit_barrier (); + DONE; +}) + +;; This is effectively a copy of indirect_jump, but constrained such +;; that register renaming cannot foil our cunning plan with $27. +(define_insn "builtin_longjmp_internal" + [(set (pc) + (unspec_volatile [(match_operand:DI 0 "register_operand" "c")] + UNSPECV_LONGJMP))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +(define_expand "builtin_setjmp_receiver" + [(unspec_volatile [(label_ref (match_operand 0))] UNSPECV_SETJMPR)] + "") + +(define_insn_and_split "*builtin_setjmp_receiver_1" + [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR)] + "" +{ + if (TARGET_EXPLICIT_RELOCS) + return "#"; + else + return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)"; +} + "&& TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 1) + (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1)) + (set (match_dup 1) + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))] +{ + if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0)) + emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]), + UNSPECV_SETJMPR_ER)); + operands[1] = pic_offset_table_rtx; + operands[2] = gen_rtx_REG (Pmode, 27); + operands[3] = GEN_INT (sw_64_next_sequence_number++); +} + [(set_attr "length" "12") + (set_attr "type" "multi")]) + +(define_insn "*builtin_setjmp_receiver_er_sl_1" + [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR_ER)] + "TARGET_EXPLICIT_RELOCS" + "ldi $27,$LSJ%=-%l0($27)\n$LSJ%=:") + +;; When flag_reorder_blocks_and_partition is in effect, compiler puts +;; exception landing pads in a cold section. To prevent inter-section offset +;; calculation, a jump to original landing pad is emitted in the place of the +;; original landing pad. Since landing pad is moved, RA-relative GP +;; calculation in the prologue of landing pad breaks. To solve this problem, +;; we use alternative GP load approach. + +(define_expand "exception_receiver" + [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)] + "" +{ + if (flag_reorder_blocks_and_partition) + operands[0] = copy_rtx (sw_64_gp_save_rtx ()); + else + operands[0] = const0_rtx; +}) + +(define_insn "*exception_receiver_2" + [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)] + "flag_reorder_blocks_and_partition" + "ldl $29,%0" + [(set_attr "type" "ild")]) + +(define_insn_and_split "*exception_receiver_1" + [(unspec_volatile [(const_int 0)] UNSPECV_EHR)] + "" +{ + if (TARGET_EXPLICIT_RELOCS) + return "#"; + else + return "ldgp $29,0($26)"; +} + "&& TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_LDGP2))] +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 26); + operands[2] = GEN_INT (sw_64_next_sequence_number++); +} + [(set_attr "length" "8") + (set_attr "type" "multi")]) + +;; Prefetch data. +;; +;; +;; On SW6, these become official prefetch instructions. + +(define_insn "prefetch" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n"))] + "sw_64_cpu == PROCESSOR_SW6 || sw_64_cpu == PROCESSOR_SW8" +{ + /* Interpret "no temporal locality" as this data should be evicted once + it is used. The "evict next" alternatives load the data into the cache + and leave the LRU eviction counter pointing to that block. */ + static const char * alt[2][2] ; + if (flag_sw_prefetch_l1) + { + alt[0][0] = "fillcs_e %a0" ; /* read, evict next. */ + alt[0][1] = "fillcs %a0" ; /* read, evict next. */ + alt[1][0] = "fillde_e %a0" ; /* write, evict next. */ + alt[1][1] = "fillde %a0" ; /* write, evict next. */ + + } + else + { + alt[0][0] = "s_fillde %a0" ; /* read, evict next. */ + alt[0][1] = "s_fillcs %a0" ; /* read, evict next. */ + alt[1][0] = "fillde_e %a0" ; /* write, evict next. */ + alt[1][1] = "fillde %a0" ; /* write, evict next. */ + } + + bool write = INTVAL (operands[1]) != 0; + bool lru = INTVAL (operands[2]) != 0; + + return alt[write][lru]; +} + [(set_attr "type" "ild")]) + + +;; Close the trap shadow of preceding instructions. This is generated +;; by sw_64_reorg. + +(define_insn "trapb" + [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)] + "" + "memb" + [(set_attr "type" "misc")]) + +;; No-op instructions used by machine-dependent reorg to preserve +;; alignment for instruction issue. +;; The Unicos/Mk assembler does not support these opcodes. + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "type" "ilog")]) + +(define_insn "fnop" + [(const_int 1)] + "TARGET_FP" + "fcpys $f31,$f31,$f31" + [(set_attr "type" "fcpys")]) + +(define_insn "unop" + [(const_int 2)] + "" + "ldl_u $31,0($30)") + +(define_insn "realign" + [(unspec_volatile [(match_operand 0 "immediate_operand" "i")] + UNSPECV_REALIGN)] + "" + ".align %0 #realign") + +;; Instructions to be emitted from __builtins. + +(define_insn "builtin_cmpbge" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")] + UNSPEC_CMPBGE))] + "" + "cmpgeb %r1,%2,%0" + ;; The SW6 data sheets list this as ILOG. OTOH, SW6 doesn't + ;; actually differentiate between ILOG and ICMP in the schedule. + [(set_attr "type" "icmp")]) + +(define_expand "extbl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (8), operands[2])); + DONE; +}) + +(define_expand "extwl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "extll" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "extql" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_insbl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + operands[1] = gen_lowpart (QImode, operands[1]); + emit_insn (gen_insbl (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_inswl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + operands[1] = gen_lowpart (HImode, operands[1]); + emit_insn (gen_inswl (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_insll" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + operands[1] = gen_lowpart (SImode, operands[1]); + emit_insn (gen_insll (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "inswh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "inslh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "insqh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "mskbl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = GEN_INT (0xff); + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskwl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = GEN_INT (0xffff); + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskll" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = gen_int_mode (0xffffffff, DImode); + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskql" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = constm1_rtx; + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskwh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "msklh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "mskqh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_zap" + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(match_operand:DI 2 "reg_or_cint_operand")] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx mask = sw_64_expand_zap_mask (INTVAL (operands[2])); + + if (mask == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (mask == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_anddi3 (operands[0], operands[1], mask)); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_insn "*builtin_zap_1" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (and:DI (unspec:DI + [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))] + "" + "@ + # + # + bis $31,$31,%0 + zap %r1,%2,%0" + [(set_attr "type" "shift,shift,ilog,shift")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(match_operand:QI 2 "const_int_operand")] + UNSPEC_ZAP) + (match_operand:DI 1 "const_int_operand")))] + "" + [(const_int 0)] +{ + rtx mask = sw_64_expand_zap_mask (INTVAL (operands[2])); + + operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(match_operand:QI 2 "const_int_operand")] + UNSPEC_ZAP) + (match_operand:DI 1 "register_operand")))] + "" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 2)))] +{ + operands[2] = sw_64_expand_zap_mask (INTVAL (operands[2])); + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (operands[2] == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } +}) + +(define_expand "builtin_zapnot" + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(not:QI (match_operand:DI 2 "reg_or_cint_operand"))] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx mask = sw_64_expand_zap_mask (~ INTVAL (operands[2])); + + if (mask == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (mask == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_anddi3 (operands[0], operands[1], mask)); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_insn "*builtin_zapnot_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (unspec:DI + [(not:QI (match_operand:QI 2 "register_operand" "r"))] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "" + "zapnot %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "builtin_amask" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")] + UNSPEC_AMASK))] + "" + "amask %1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "builtin_implver" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_IMPLVER))] + "" + "implver %0" + [(set_attr "type" "ilog")]) + +(define_insn "builtin_rpcc" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))] + "" + "rtc %0" + [(set_attr "type" "ilog")]) + +(define_expand "builtin_minub8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minsb8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minuw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minsw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxub8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxsb8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxuw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxsw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + sw_64_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_insn "builtin_perr" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rJ")] + UNSPEC_PERR))] + "TARGET_MAX" + "perr %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_pklb" + [(set (match_operand:DI 0 "register_operand") + (vec_concat:V8QI + (vec_concat:V4QI + (truncate:V2QI (match_operand:DI 1 "register_operand")) + (match_dup 2)) + (match_dup 3)))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V8QImode, operands[0]); + operands[1] = gen_lowpart (V2SImode, operands[1]); + operands[2] = CONST0_RTX (V2QImode); + operands[3] = CONST0_RTX (V4QImode); +}) + +(define_insn "*pklb" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (vec_concat:V8QI + (vec_concat:V4QI + (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r")) + (match_operand:V2QI 2 "const0_operand")) + (match_operand:V4QI 3 "const0_operand")))] + "TARGET_MAX" + "pklb %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_pkwb" + [(set (match_operand:DI 0 "register_operand") + (vec_concat:V8QI + (truncate:V4QI (match_operand:DI 1 "register_operand")) + (match_dup 2)))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V8QImode, operands[0]); + operands[1] = gen_lowpart (V4HImode, operands[1]); + operands[2] = CONST0_RTX (V4QImode); +}) + +(define_insn "*pkwb" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (vec_concat:V8QI + (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r")) + (match_operand:V4QI 2 "const0_operand")))] + "TARGET_MAX" + "pkwb %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_unpkbl" + [(set (match_operand:DI 0 "register_operand") + (zero_extend:V2SI + (vec_select:V2QI (match_operand:DI 1 "register_operand") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V2SImode, operands[0]); + operands[1] = gen_lowpart (V8QImode, operands[1]); +}) + +(define_insn "*unpkbl" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (zero_extend:V2SI + (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_MAX" + "unpkbl %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_unpkbw" + [(set (match_operand:DI 0 "register_operand") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:DI 1 "register_operand") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V4HImode, operands[0]); + operands[1] = gen_lowpart (V8QImode, operands[1]); +}) + +(define_insn "*unpkbw" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_MAX" + "unpkbw %r1,%0" + [(set_attr "type" "mvi")]) + +(include "sync.md") + +;; The call patterns are at the end of the file because their +;; wildcard operand0 interferes with nice recognition. + +(define_insn "*call_value_osf_1_er_noreturn" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + call $26,($27),0 + bsr $26,%1\t\t!samegp + ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),%1\t\t!lituse_jsr!%#" + [(set_attr "type" "call") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_value_osf_1_er_setfpec0" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3 " + "@ + call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%1\t\t!samegp + ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_value_osf_1_er_setfpec1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1" + "@ + call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%1\t\t!samegp + ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_value_osf_1_er" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS" + "@ + call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%* + bsr $26,%1\t\t!samegp + ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. Consider the case of { bar (); while (1); }. +(define_peephole2 + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && reload_completed + && ! samegp_function_operand (operands[1], Pmode) + && (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (match_dup 2))) + (use (reg:DI 29)) + (use (match_dup 1)) + (use (match_dup 4)) + (clobber (reg:DI 26))])] +{ + if (CONSTANT_P (operands[1])) + { + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (sw_64_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx, + operands[1], operands[4])); + } + else + { + operands[3] = operands[1]; + operands[1] = const0_rtx; + operands[4] = const0_rtx; + } +}) + +(define_peephole2 + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && reload_completed + && ! samegp_function_operand (operands[1], Pmode) + && ! (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX)) + && !enable_asan_check_stack ()" + [(parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (match_dup 2))) + (set (match_dup 6) + (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (match_dup 5)) + (clobber (reg:DI 26))]) + (set (match_dup 6) + (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))] +{ + if (CONSTANT_P (operands[1])) + { + operands[3] = gen_rtx_REG (Pmode, 27); + operands[5] = GEN_INT (sw_64_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx, + operands[1], operands[5])); + } + else + { + operands[3] = operands[1]; + operands[1] = const0_rtx; + operands[5] = const0_rtx; + } + operands[4] = GEN_INT (sw_64_next_sequence_number++); + operands[6] = pic_offset_table_rtx; +}) + +(define_insn "*call_value_osf_2_er_nogp" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "register_operand" "c")) + (match_operand 2))) + (use (reg:DI 29)) + (use (match_operand 3)) + (use (match_operand 4)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS" + "call $26,(%1),%3%J4" + [(set_attr "type" "call")]) + +(define_insn "*call_value_osf_2_er" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "register_operand" "c")) + (match_operand 2))) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand")] + UNSPEC_LDGP1)) + (use (match_operand 3)) + (use (match_operand 4)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS" + { + return "call $26,(%1),%3%J4\;ldih $29,0($26)\t\t!gpdisp!%5"; + } + [(set_attr "type" "call") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_value_osf_1_noreturn" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + call $26,($27),0 + bsr $26,$%1..ng + call $26,%1" + [(set_attr "type" "call") + (set_attr "length" "*,*,8")]) + +(define_int_iterator TLS_CALL + [UNSPEC_TLSGD_CALL + UNSPEC_TLSLDM_CALL]) + +(define_int_attr tls + [(UNSPEC_TLSGD_CALL "tlsgd") + (UNSPEC_TLSLDM_CALL "tlsldm")]) + +(define_insn "call_value_osf_" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "HAVE_AS_TLS" + "ldl $27,%1($29)\t\t!literal!%2\;call $26,($27),%1\t\t!lituse_!%2\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "call") + (set_attr "cannot_copy" "true") + (set_attr "length" "16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. +(define_peephole2 + [(parallel + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "HAVE_AS_TLS && reload_completed + && peep2_regno_dead_p (1, 29)" + [(set (match_dup 3) + (unspec:DI [(match_dup 5) + (match_dup 1) + (match_dup 2)] UNSPEC_LITERAL)) + (parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (const_int 0))) + (use (match_dup 5)) + (use (match_dup 1)) + (use (unspec [(match_dup 2)] TLS_CALL)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] +{ + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (sw_64_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +}) + +(define_peephole2 + [(parallel + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "HAVE_AS_TLS && reload_completed + && !peep2_regno_dead_p (1, 29) + && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)" + [(set (match_dup 3) + (unspec:DI [(match_dup 5) + (match_dup 1) + (match_dup 2)] UNSPEC_LITERAL)) + (parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (const_int 0))) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (unspec [(match_dup 2)] TLS_CALL)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] +{ + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (sw_64_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +}) + + +(define_insn "*call_value_osf_1_setfpec0" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3" + "@ + call $26,($27),0\;ldgp $29,0($26) + bsr $26,$%1..ng + call $26,%1\;ldgp $29,0($26)" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_value_osf_1_setfpec1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1" + "@ + call $26,($27),0\;ldgp $29,0($26) + bsr $26,$%1..ng + call $26,%1\;ldgp $29,0($26)" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*call_value_osf_1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS" + "@ + call $26,($27),0\;ldgp $29,0($26) + bsr $26,$%1..ng + call $26,%1\;ldgp $29,0($26)" + [(set_attr "type" "call") + (set_attr "length" "12,*,16")]) + +(define_insn "*sibcall_value_osf_1_er" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s")) + (match_operand 2))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "TARGET_EXPLICIT_RELOCS" + "@ + br $31,%1\t\t!samegp + ldl $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#" + [(set_attr "type" "call") + (set_attr "length" "*,8")]) + +(define_insn "*sibcall_value_osf_1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s")) + (match_operand 2))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "! TARGET_EXPLICIT_RELOCS" + "@ + br $31,$%1..ng + ldi $27,%1\;jmp $31,($27),%1" + [(set_attr "type" "call") + (set_attr "length" "*,8")]) + +;; Builtins to replace 1.0f/sqrtf(x) with instructions using RSQRTE and the +;; appropriate fixup. +;; Currently, does not work with the double precision floating-point.(0x5fe6eb000000000a) +(define_expand "rsqrtsf2" + [(match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")] + "TARGET_FP && flag_reciprocal_math == 1 && flag_sw_rsqrt == 1" + { + sw_64_emit_rsqrt (operands[0], operands[1], 1); + DONE; + }) + +(define_insn "*movsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r") + (unspec:SF [(match_operand:SF 1 "input_operand" "f")] + UNSPEC_FIMOVS))] + "TARGET_FP && flag_reciprocal_math == 1 && flag_sw_rsqrt == 1" + "fimovs %1,%0" + [(set_attr "type" "ldsym")]) + +(define_insn "speculation_barrier" + [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] + "" + "imemb" + [(set_attr "type" "misc")]) + +(define_insn "stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:DI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "r")] + UNSPEC_TIE))] + "" + "" + [(set_attr "length" "0")] +) + +(include "m32.md") diff --git a/gcc/config/sw_64/sw_64.opt b/gcc/config/sw_64/sw_64.opt new file mode 100644 index 0000000000000000000000000000000000000000..fdb6304a283e7226f471dfe94db6ddf15e6f8246 --- /dev/null +++ b/gcc/config/sw_64/sw_64.opt @@ -0,0 +1,318 @@ +; Options for the Sw_64 port of the compiler +; +; Copyright (C) 2005-2020 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . +fsw-sf-cmpsel +Target Var(flag_sw_sf_cmpsel) Init(0) +use or not use SF cmp/br/selcet instructions. + +msw-use-32align +C C++ Fortran LTO Driver Target Mask(SW_32ALIGN) Save +Use or not use 32align. + +fsw-hardware-prefetch +Target Var(flag_sw_hardware_prefetch) Init(0) +set hardware_prefetch registers:PFH_CTL,PFH_CNT. + +fsw-hardware-prefetch-clt= +Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_clt) Init(5) Optimization + +fsw-hardware-prefetch-cnt-l1= +Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l1) Init(0) Optimization + +fsw-hardware-prefetch-cnt-l2= +Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l2) Init(0) Optimization + +fsw-hardware-prefetch-cnt-l3= +Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l3) Init(5) Optimization + +fsw-fselect +Target Var(flag_sw_fselect) Init(0) +Use or not use less instructions for sel/fsel. + +fsw-branch-fusion +Target Var(flag_sw_branch_fusion) Init(1) +fuse the cbranch instructions. + +fsw-branch-combination +Target Var(flag_sw_branch_combination) Init(0) +combine the cbranch instructions. + +fsw-unalign-byte +Target Var(flag_sw_unalign_byte) Init(0) +Not use or use ldl_u/stl_u instructions. + +fsw-rev +Target Report Var(flag_sw_rev) Init(1) +Use or not use rev instruction. + +fsw-cmov +Target Report Var(flag_sw_cmov) Init(1) +Use added floating-point integer conversion instruction. + +fsw-bitop +Target Report Var(flag_sw_bitop) Init(0) +Use ISA bit operate instructions. + +fsw-shift-word +Target Report Var(flag_sw_shift_word) Init(1) +Use or not use sw8a shift instructions. + +fsw-int-divmod +Target Report Var(flag_sw_int_divmod) Init(1) +Use or not use int div/mod instructions. + +fsw-fprnd +Target Report Var(flag_sw_fprnd) Init(0) +Use float-point rounding instructions. + +fsw-auto-inc-dec +Target Var(flag_sw_auto_inc_dec) Init(0) +Use or not use int auto-inc-dec load/store instructions. + +fsw-use-cas +Target Var(flag_sw_use_cas) Init(1) +Use or no use compare and swap instruction. + +fsw-fma +Target Report Var(flag_sw_fma) Init(1) +Add fma option. + +fsw-sdsame +Target Report Var(flag_sw_sdsame) Init(0) +For des and src same. +;;;;;;;;;;;;;;;;;;;;;;;;; + +fsw-rsqrt +Target Report Var(flag_sw_rsqrt) Init(0) +Fast calculation of 1.0f/sqrtf (x). Does not work with double precision floating-point. +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +fsw-fast-math +Target Report Var(flag_sw_fast_math) Init(0) +Avoid spec2017-628 error fast-math. The corresponding code is in gcc/gimple-match-head.c. +;;;;;;;;;;;;;;;;;;;;;;;;; + +fsw-nofcpys +Target Var(flag_sw_nofcpys) Init(1) +delete fcpys after fcvtsd instruction. + +fsw-rtid +Target Var(flag_sw_rtid) Init(1) +Use rtid instead of syscall 0x9e. + +fsw-rtx-cost +Target Var(flag_sw_rtx_cost) Init(0) +Adjust the rtx-cost. + +fsw-sxaddl +Target Var(flag_sw_sxaddl) Init(1) +Combine the sXaddl instructions. + +fsw-delnop +Target Var(flag_sw_delnop) Init(1) +Delete the nop instruction. + +fsw-int-div-opt +Target Report Var(flag_sw_int_div_opt) Init(0) +SW div opt. + +fsw-prefetch-l1 +Target Var(flag_sw_prefetch_l1) Init(1) +Use l1 load prefetch instead of L2. + +fsw-prefetch-add +Target Var(flag_sw_prefetch_add) Init(1) +generate prefetch for cases like stream add. + +fsw-prefetch-unroll +Target Var(flag_sw_prefetch_unroll) Init(0) +Optimize loop unroll in the prefetch pass. + +msoft-float +Target Report Mask(SOFT_FP) +Do not use hardware fp. + +fsw-recip +Target Report Var(flag_sw_recip) Init(0) +Use ISA floating reciprocal instructions. + +fsw-recip-precision +Target Report Var(flag_sw_recip_precision) Init(0) +Assume that the reciprocal estimate instructions provide more accuracy. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +mfp-regs +Target Report Mask(FPREGS) +Use fp registers. + +mgas +Target Ignore +Does nothing. Preserved for backward compatibility. + +mieee-conformant +Target RejectNegative Mask(IEEE_CONFORMANT) +Request IEEE-conformant math library routines (SYSV). + +mieee +Target Report RejectNegative Mask(IEEE) +Emit IEEE-conformant code, without inexact exceptions. + +mieee-main +Target Report RejectNegative Mask(IEEE_MAIN) +Emit IEEE-conformant code, without inexact exceptions. + +mieee-with-inexact +Target Report RejectNegative Mask(IEEE_WITH_INEXACT) + +mbuild-constants +Target Report Mask(BUILD_CONSTANTS) +Do not emit complex integer constants to read-only memory. + +mfloat-vax +Target Report RejectNegative Mask(FLOAT_VAX) +Use VAX fp. + +mfloat-ieee +Target Report RejectNegative InverseMask(FLOAT_VAX) +Do not use VAX fp. + +mbwx +Target Report Mask(BWX) +Emit code for the byte/word ISA extension. + +mmax +Target Report Mask(MAX) +Emit code for the motion video ISA extension. + +mfix +Target Report Mask(FIX) +Emit code for the fp move and sqrt ISA extension. + +mcix +Target Report Mask(CIX) +Emit code for the counting ISA extension. + +msw6a +Target Report Mask(SW6A) +Emit code for the SW6A ISA extension. + +msw6b +Target Report Mask(SW6B) +Emit code for the SW6B ISA extension. + +msw8a +Target Report Mask(SW8A) +Emit code for the SW8A ISA extension. + +mexplicit-relocs +Target Report Mask(EXPLICIT_RELOCS) +Emit code using explicit relocation directives. + +msmall-data +Target Report RejectNegative Mask(SMALL_DATA) +Emit 16-bit relocations to the small data areas. + +mlarge-data +Target Report RejectNegative InverseMask(SMALL_DATA) +Emit 32-bit relocations to the small data areas. + +msmall-text +Target Report RejectNegative Mask(SMALL_TEXT) +Emit direct branches to local functions. + +mlarge-text +Target Report RejectNegative InverseMask(SMALL_TEXT) +Emit indirect branches to local functions. + +mtls-kernel +Target Report Mask(TLS_KERNEL) +Emit rdval for thread pointer. + +mlong-double-128 +Target Report RejectNegative Mask(LONG_DOUBLE_128) +Use 128-bit long double. + +mlong-double-64 +Target Report RejectNegative InverseMask(LONG_DOUBLE_128) +Use 64-bit long double. + +mcpu= +Target RejectNegative Joined Var(sw_64_cpu_string) +Use features of and schedule given CPU. + +mtune= +Target RejectNegative Joined Var(sw_64_tune_string) +Schedule given CPU. + +mfp-rounding-mode= +Target RejectNegative Joined Var(sw_64_fprm_string) +Control the generated fp rounding mode. + +mfp-trap-mode= +Target RejectNegative Joined Var(sw_64_fptm_string) +Control the IEEE trap mode. + +mtrap-precision= +Target RejectNegative Joined Var(sw_64_tp_string) +Control the precision given to fp exceptions. + +mmemory-latency= +Target RejectNegative Joined Var(sw_64_mlat_string) +Tune expected memory latency. + +mtls-size= +Target RejectNegative Joined UInteger Var(sw_64_tls_size) Init(32) +Specify bit size of immediate TLS offsets. + +msimd +C C++ Fortran Driver Target Mask(SW_SIMD) Save +Support SW SIMD built-in functions and code generation. + +mgprel-size= +Target RejectNegative Joined UInteger Var(sw_64_gprel_size) Init(16) +Specify bit size of gprel relocation offsets. + +mtls-tlsgd= +Target RejectNegative Joined UInteger Var(sw_64_tls_gd) Init(16) +Specify the bitsize of tlsgd relocation offset relative GP. + +mtls-tlsldm= +Target RejectNegative Joined UInteger Var(sw_64_tls_ldm) Init(16) +Specify the bitsize of tlsldm relocation offset relative GP. + +mtls-gotdtprel= +Target RejectNegative Joined UInteger Var(sw_64_tls_gotdtprel) Init(16) +Specify the bitsize of gotdtprel relocation offset relative GP. + +mtls-gottprel= +Target RejectNegative Joined UInteger Var(sw_64_tls_gottprel) Init(16) +Specify the bitsize of gottprel relocation offset relative GP. + +mlra +Target Report Var(sh_lra_flag) Init(0) Save +Use reload instead of LRA (transitional). + +mtrunc +C Fortran Driver Target Mask(SW_TRUNC) Save +Support fix_trunc code generation. + +m32 +C ObjC C++ ObjC++ LTO Fortran Driver Target Report Mask(SW_M32) Init(0) +M32 optimization. diff --git a/gcc/config/sw_64/sync.md b/gcc/config/sw_64/sync.md new file mode 100644 index 0000000000000000000000000000000000000000..71fd0478e21ca55779f61b364323ec869c4ba240 --- /dev/null +++ b/gcc/config/sw_64/sync.md @@ -0,0 +1,499 @@ +;; GCC machine description for Sw_64 synchronization instructions. +;; Copyright (C) 2005-2020 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_code_iterator FETCHOP [plus minus ior xor and]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) +(define_code_attr fetchop_pred + [(plus "add_operand") (minus "reg_or_8bit_operand") + (ior "or_operand") (xor "or_operand") (and "and_operand")]) +(define_code_attr fetchop_constr + [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "rINM")]) + + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MB))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +;; mb-> memb +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MB))] + "" + "memb" + [(set_attr "type" "mb")]) + +(define_insn "write_memory_barrier" + [(unspec:BLK [(const_int 0)] UNSPEC_MB)] + "TARGET_SW8A" + "wmemb" + [(set_attr "type" "mb")]) + +;; "ld_l %0,%1" +(define_insn "@load_locked_" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (unspec_volatile:I48MODE + [(match_operand:I48MODE 1 "memory_operand" "m")] + UNSPECV_LL))] + "" + { + switch ('') + { + case 'w': + return "ldi %0,%1\;lldw %0,0(%0)"; + case 'l': + return "ldi %0,%1\;lldl %0,0(%0)"; + default: + return "ld_l %0,%1"; + } + } + [(set_attr "type" "ld_l")]) + +;; "st_c %0,%1" +(define_insn "@store_conditional_" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC)) + (set (match_operand:I48MODE 1 "memory_operand" "=m") + (match_operand:I48MODE 2 "reg_or_0_operand" "0")) + (clobber (reg:DI 28))] + "" + { + switch ('') + { + case 'w': + return "ldi $28,%1\;lstw %0,0($28)"; + case 'l': + return "ldi $28,%1\;lstl %0,0($28)"; + default: + return "st_c %0,%1"; + } + } + [(set_attr "type" "st_c")]) + + (define_insn "builtin_rd_f" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))] + "" + "rd_f %0" + [(set_attr "type" "st_c")]) + + (define_insn "builtin_wr_f" + [(match_operand:DI 0 "register_operand" "r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_LL)] + "" + "wr_f %0" + [(set_attr "type" "ld_l")]) + +;; The Sw_64 Architecture Handbook says that it is UNPREDICTABLE whether +;; the lock is cleared by a normal load or store. This means we cannot +;; expand a ll/sc sequence before reload, lest a register spill is +;; inserted inside the sequence. It is also UNPREDICTABLE whether the +;; lock is cleared by a TAKEN branch. This means that we can not expand +;; a ll/sc sequence containing a branch (i.e. compare-and-swap) until after +;; the final basic-block reordering pass. + +(define_expand "atomic_compare_and_swap" + [(parallel + [(set (match_operand:DI 0 "register_operand") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 1 "register_operand") ;; val out + (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 2 "memory_operand") ;; memory + (unspec_volatile:I48MODE + [(match_dup 2) + (match_operand:I48MODE 3 "reg_or_8bit_operand") ;; expected + (match_operand:I48MODE 4 "add_operand") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand") ;; fail model + (match_operand:DI 8 "register_operand")] + UNSPECV_CMPXCHG)) + (clobber (reg:DI 28))])] + "" +{ + if (mode == SImode) + { + operands[3] = convert_modes (DImode, SImode, operands[3], 0); + operands[4] = convert_modes (DImode, SImode, operands[4], 0); + } + if (TARGET_SW8A) + { + if (flag_sw_use_cas) + { + if (CONST_INT_P (operands[3])) + operands[3] = force_reg (DImode, operands[3]); + + if (CONST_INT_P (operands[4])) + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_atomic_compare_and_swap_target_sw8a (operands[0], + operands[1], + operands[2], + operands[3], + operands[4], + operands[5], + operands[6], + operands[7])); + DONE; + } + } +}) + +(define_insn_and_split "*atomic_compare_and_swap" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 1 "register_operand" "=&r") ;; val out + (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 2 "memory_operand" "+m") ;; memory + (unspec_volatile:I48MODE + [(match_dup 2) + (match_operand:DI 3 "reg_or_8bit_operand" "rI") ;; expected + (match_operand:DI 4 "add_operand" "rKL") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand") ;; fail model + (match_operand:DI 8 "register_operand" "r")] + UNSPECV_CMPXCHG)) + (clobber (reg:DI 28))] + + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_compare_and_swap (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:DI 0 "register_operand") ;; bool out + (match_operand:I12MODE 1 "register_operand") ;; val out + (match_operand:I12MODE 2 "mem_noofs_operand") ;; memory + (match_operand:I12MODE 3 "register_operand") ;; expected + (match_operand:I12MODE 4 "add_operand") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand") ;; fail model + (match_operand:DI 8 "register_operand")] + "" +{ + if (flag_sw_use_cas) + { + if (CONST_INT_P (operands[3])) + operands[3] = force_reg (mode, operands[3]); + + if (CONST_INT_P (operands[4])) + operands[4] = force_reg (mode, operands[4]); + } + sw_64_expand_compare_and_swap_12 (operands); + DONE; +}) + +(define_insn_and_split "@atomic_compare_and_swap_1" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:DI 1 "register_operand" "=&r") ;; val out + (zero_extend:DI + (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG))) + (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w") ;; memory + (unspec_volatile:I12MODE + [(match_dup 2) + (match_operand:DI 3 "reg_or_8bit_operand" "rI") ;; expected + (match_operand:DI 4 "reg_or_0_operand" "rJ") ;; desired + (match_operand:DI 5 "register_operand" "r") ;; align + (match_operand:SI 6 "const_int_operand") ;; is_weak + (match_operand:SI 7 "const_int_operand") ;; succ model + (match_operand:SI 8 "const_int_operand") ;; fail model + (match_operand:DI 9 "register_operand" "r")] + UNSPECV_CMPXCHG)) + (clobber (match_scratch:DI 10 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_compare_and_swap_12 (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_compare_and_swap_target_sw8a" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 1 "register_operand" "=&r") ;; val out + (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 2 "memory_operand" "+m") ;; memory + (unspec_volatile:I48MODE + [(match_dup 2) + (match_operand:DI 3 "reg_or_8bit_operand" "r") ;; expected + (match_operand:DI 4 "add_operand" "r") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand")] ;; fail model + UNSPECV_CMPXCHG)) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + if (flag_sw_use_cas) + sw_64_split_atomic_cas (operands); + else + sw_64_split_compare_and_swap (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "@atomic_compare_and_swap_1_target_sw8a" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:DI 1 "register_operand" "=&r") ;; val out + (zero_extend:DI + (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG))) + (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w") ;; memory + (unspec_volatile:I12MODE + [(match_dup 2) + (match_operand:DI 3 "reg_or_8bit_operand" "rI") ;; expected + (match_operand:DI 4 "register_operand" "r") ;; desired + (match_operand:DI 5 "register_operand" "r") ;; align + (match_operand:SI 6 "const_int_operand") ;; is_weak + (match_operand:SI 7 "const_int_operand") ;; succ model + (match_operand:SI 8 "const_int_operand")] ;; fail model + UNSPECV_CMPXCHG)) + (clobber (match_scratch:DI 9 "=&r")) + (clobber (match_scratch:DI 10 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_compare_and_swap_12 (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn "sw_64_atomic_cas" + [(set (match_operand:I48MODE 0 "register_operand" "") ;; out + (match_operand:I48MODE 1 "memory_operand" "")) ;; memory. + (set (match_dup 1) + (unspec_volatile:I48MODE + [(match_dup 0) + (match_operand:I48MODE 2 "register_operand" "")] ;; value. + UNSPECV_CMPXCHG)) + (clobber (reg:DI 28))] + "TARGET_SW8A && flag_sw_use_cas" + "ldi $28,%1\;cas %0,$28,%2") +;; endif + +(define_insn_and_split "atomic_exchange" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") ;; output + (match_operand:I48MODE 1 "memory_operand" "+m")) ;; memory + (set (match_dup 1) + (unspec:I48MODE + [(match_operand:I48MODE 2 "add_operand" "rKL") ;; input + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_XCHG)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_exchange (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "atomic_exchange" + [(match_operand:I12MODE 0 "register_operand") ;; output + (match_operand:I12MODE 1 "mem_noofs_operand") ;; memory + (match_operand:I12MODE 2 "reg_or_0_operand") ;; input + (match_operand:SI 3 "const_int_operand")] ;; model + "" +{ + sw_64_expand_atomic_exchange_12 (operands); + DONE; +}) + +(define_insn_and_split "@atomic_exchange_1" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; output + (zero_extend:DI + (match_operand:I12MODE 1 "mem_noofs_operand" "+w"))) ;; memory + (set (match_dup 1) + (unspec:I12MODE + [(match_operand:DI 2 "reg_or_8bit_operand" "rI") ;; input + (match_operand:DI 3 "register_operand" "r") ;; align + (match_operand:SI 4 "const_int_operand")] ;; model + UNSPEC_XCHG)) + (clobber (match_scratch:DI 5 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_exchange_12 (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_" + [(set (match_operand:I48MODE 0 "memory_operand" "+m") + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 0) + (match_operand:I48MODE 1 "" "")) + (match_operand:SI 2 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_op (, operands[0], operands[1], + NULL, NULL, operands[3], + (enum memmodel) INTVAL (operands[2])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_nand" + [(set (match_operand:I48MODE 0 "memory_operand" "+m") + (unspec:I48MODE + [(not:I48MODE + (and:I48MODE (match_dup 0) + (match_operand:I48MODE 1 "register_operand" "r"))) + (match_operand:SI 2 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_op (NOT, operands[0], operands[1], + NULL, NULL, operands[3], + (enum memmodel) INTVAL (operands[2])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_fetch_" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 1) + (match_operand:I48MODE 2 "" "")) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_op (, operands[1], operands[2], + operands[0], NULL, operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_fetch_nand" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(not:I48MODE + (and:I48MODE (match_dup 1) + (match_operand:I48MODE 2 "register_operand" "r"))) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_op (NOT, operands[1], operands[2], + operands[0], NULL, operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic__fetch" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (FETCHOP:I48MODE + (match_operand:I48MODE 1 "memory_operand" "+m") + (match_operand:I48MODE 2 "" ""))) + (set (match_dup 1) + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 1) (match_dup 2)) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_op (, operands[1], operands[2], + NULL, operands[0], operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_nand_fetch" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (not:I48MODE + (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m") + (match_operand:I48MODE 2 "register_operand" "r")))) + (set (match_dup 1) + (unspec:I48MODE + [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2))) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r")) + (clobber (reg:DI 28))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + sw_64_split_atomic_op (NOT, operands[1], operands[2], + NULL, operands[0], operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) diff --git a/gcc/config/sw_64/t-linux b/gcc/config/sw_64/t-linux new file mode 100644 index 0000000000000000000000000000000000000000..d78ef47dfaf17157fd1fd542e56d70d0f4630526 --- /dev/null +++ b/gcc/config/sw_64/t-linux @@ -0,0 +1 @@ +MULTIARCH_DIRNAME = $(call if_multiarch,sw_64-linux-gnu) diff --git a/gcc/config/sw_64/t-sw_64 b/gcc/config/sw_64/t-sw_64 new file mode 100644 index 0000000000000000000000000000000000000000..d7b5e98a066546da1d91d3942bdf1c16986a29d9 --- /dev/null +++ b/gcc/config/sw_64/t-sw_64 @@ -0,0 +1,19 @@ +# Copyright (C) 2016-2020 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +PASSES_EXTRA += $(srcdir)/config/sw_64/sw_64-passes.def diff --git a/gcc/config/sw_64/x-sw_64 b/gcc/config/sw_64/x-sw_64 new file mode 100644 index 0000000000000000000000000000000000000000..229866b30b3b70570979f15d1c57cd0099643a9e --- /dev/null +++ b/gcc/config/sw_64/x-sw_64 @@ -0,0 +1,3 @@ +driver-sw_64.o: $(srcdir)/config/sw_64/driver-sw_64.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/configure b/gcc/configure index d4f97834fdc7f8dfbfc2d0a8f10e6beca767e926..707f3fdf5b9a7054f79170413d852954cb84db64 100755 --- a/gcc/configure +++ b/gcc/configure @@ -25109,6 +25109,29 @@ foo: .long 25 xor %l1, %tle_lox10(foo), %o5 ld [%g7 + %o5], %o1" ;; + sw_64*-*-*) + conftest_s=' + .section ".tdata","awT",@progbits +foo: .long 25 + .text + ldl $27,__tls_get_addr($29) !literal!1 + ldi $16,foo($29) !tlsgd!1 + call $26,($27),__tls_get_addr !lituse_tlsgd!1 + ldl $27,__tls_get_addr($29) !literal!2 + ldi $16,foo($29) !tlsldm!2 + call $26,($27),__tls_get_addr !lituse_tlsldm!2 + ldl $1,foo($29) !gotdtprel + ldih $2,foo($29) !dtprelhi + ldi $3,foo($2) !dtprello + ldi $4,foo($29) !dtprel + ldl $1,foo($29) !gottprel + ldih $2,foo($29) !tprelhi + ldi $3,foo($2) !tprello + ldi $4,foo($29) !tprel' + tls_first_major=2 + tls_first_minor=13 + tls_as_opt=--fatal-warnings + ;; tilepro*-*-*) conftest_s=' .section ".tdata","awT",@progbits @@ -28283,6 +28306,111 @@ fi ;; + + sw_64*-*-linux* | sw_64*-*-*bsd*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for explicit relocation support" >&5 +$as_echo_n "checking assembler for explicit relocation support... " >&6; } +if ${gcc_cv_as_sw_64_explicit_relocs+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_sw_64_explicit_relocs=no + if test $in_tree_gas = yes; then + if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 12 \) \* 1000 + 0` + then gcc_cv_as_sw_64_explicit_relocs=yes +fi + elif test x$gcc_cv_as != x; then + $as_echo ' .set nomacro + .text + ext0b $3, $2, $3 !lituse_bytoff!1 + ldl $2, a($29) !literal!1 + ldl $4, b($29) !literal!2 + ldl_u $3, 0($2) !lituse_base!1 + ldl $27, f($29) !literal!5 + call $26, ($27), f !lituse_jsr!5 + ldih $29, 0($26) !gpdisp!3 + ldi $0, c($29) !gprel + ldih $1, d($29) !gprelhigh + ldi $1, d($1) !gprellow + ldi $29, 0($29) !gpdisp!3' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_sw_64_explicit_relocs=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_explicit_relocs" >&5 +$as_echo "$gcc_cv_as_sw_64_explicit_relocs" >&6; } +if test $gcc_cv_as_sw_64_explicit_relocs = yes; then + +$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h + +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for jsrdirect relocation support" >&5 +$as_echo_n "checking assembler for jsrdirect relocation support... " >&6; } +if ${gcc_cv_as_sw_64_jsrdirect_relocs+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_sw_64_jsrdirect_relocs=no + if test $in_tree_gas = yes; then + if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 16 \) \* 1000 + 90` + then gcc_cv_as_sw_64_jsrdirect_relocs=yes +fi +#trouble# + elif test x$gcc_cv_as != x; then + $as_echo ' .set nomacro + .text + ldl $27, a($29) !literal!1 + call $26, ($27), a !lituse_jsrdirect!1' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_sw_64_jsrdirect_relocs=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_jsrdirect_relocs" >&5 +$as_echo "$gcc_cv_as_sw_64_jsrdirect_relocs" >&6; } +if test $gcc_cv_as_sw_64_jsrdirect_relocs = yes; then + +$as_echo "#define HAVE_AS_JSRDIRECT_RELOCS 1" >>confdefs.h + +fi +cat >> confdefs.h <<_ACEOF +#define FLAG_SW64_ATOMIC 1 +#define FLAG_SW64_90139 1 +#define FLAG_SW64_PREFETCH 1 +#define FLAG_SW64_PROTECT 1 +#define FLAG_SW64_SIMD 1 +#define FLAG_SW64_AUTOSIMD 1 +#define FLAG_SW64_M32 1 +#define FLAG_SW64_INC_DEC 1 +#define FLAG_SW64_DELNOP 1 +#define FLAG_SW64_FM 1 +#define FLAG_SW64_WMEMB 1 +_ACEOF + + ;; + esac # Mips and HP-UX need the GNU assembler. @@ -28311,7 +28439,7 @@ esac case "$cpu_type" in aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \ | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ - | tilegx | tilepro | visium | xstormy16 | xtensa) + | sw_64 | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; ia64 | s390) @@ -29629,6 +29757,17 @@ $as_echo "$as_me: WARNING: --build-id is not supported by your linker; --enable- fi fi +# sw_64 add --enable-linker-no-relax to support linker -Wl,-no-relax +# Check whether --enable-linker-no-relax was given. +if test "${enable_linker_no_relax+set}" = set; then : + enableval=$enable_linker_no_relax; +else + enable_linker_no_relax=no +fi + +if test x"$enable_linker_no_relax" = xyes; then + $as_echo "#define ENABLE_LD_NORELAX 1" >>confdefs.h +fi # In binutils 2.21, GNU ld gained support for new emulations fully # supporting the Solaris 2 ABI. Detect their presence in the linker used. { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker *_sol2 emulation support" >&5 diff --git a/gcc/configure.ac b/gcc/configure.ac index 44154f69f0a2279fb17846fb23c729653622bdcf..91b59d3f7fb78f19cdf1025336240f7e006da197 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -3870,6 +3870,29 @@ foo: .long 25 xor %l1, %tle_lox10(foo), %o5 ld [%g7 + %o5], %o1" ;; + sw_64*-*-*) + conftest_s=' + .section ".tdata","awT",@progbits +foo: .long 25 + .text + ldl $27,__tls_get_addr($29) !literal!1 + ldi $16,foo($29) !tlsgd!1 + call $26,($27),__tls_get_addr !lituse_tlsgd!1 + ldl $27,__tls_get_addr($29) !literal!2 + ldi $16,foo($29) !tlsldm!2 + call $26,($27),__tls_get_addr !lituse_tlsldm!2 + ldl $1,foo($29) !gotdtprel + ldih $2,foo($29) !dtprelhi + ldi $3,foo($2) !dtprello + ldi $4,foo($29) !dtprel + ldl $1,foo($29) !gottprel + ldih $2,foo($29) !tprelhi + ldi $3,foo($2) !tprello + ldi $4,foo($29) !tprel' + tls_first_major=2 + tls_first_minor=13 + tls_as_opt=--fatal-warnings + ;; tilepro*-*-*) conftest_s=' .section ".tdata","awT",@progbits @@ -4345,6 +4368,34 @@ bar: [AC_DEFINE(HAVE_AS_SPARC_GOTDATA_OP, 1, [Define if your assembler and linker support GOTDATA_OP relocs.])]) + sw_64*-*-linux* | sw_64*-*-*bsd*) + gcc_GAS_CHECK_FEATURE([explicit relocation support], + gcc_cv_as_sw_64_explicit_relocs, [2,12,0],, +[ .set nomacro + .text + ext0b $3, $2, $3 !lituse_bytoff!1 + ldl $2, a($29) !literal!1 + ldl $4, b($29) !literal!2 + ldl_u $3, 0($2) !lituse_base!1 + ldl $27, f($29) !literal!5 + call $26, ($27), f !lituse_jsr!5 + ldih $29, 0($26) !gpdisp!3 + ldi $0, c($29) !gprel + ldih $1, d($29) !gprelhigh + ldi $1, d($1) !gprellow + ldi $29, 0($29) !gpdisp!3],, + [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1, + [Define if your assembler supports explicit relocations.])]) + gcc_GAS_CHECK_FEATURE([jsrdirect relocation support], + gcc_cv_as_sw_64_jsrdirect_relocs, [2,16,90],, +[ .set nomacro + .text + ldl $27, a($29) !literal!1 + call $26, ($27), a !lituse_jsrdirect!1],, + [AC_DEFINE(HAVE_AS_JSRDIRECT_RELOCS, 1, + [Define if your assembler supports the lituse_jsrdirect relocation.])]) + ;; + gcc_GAS_CHECK_FEATURE([unaligned pcrel relocs], gcc_cv_as_sparc_ua_pcrel,, [-K PIC], @@ -5145,7 +5196,7 @@ esac # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \ + aarch64 | alpha | sw_64 | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \ | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" @@ -6052,6 +6103,31 @@ if test x"$enable_linker_build_id" = xyes; then fi fi +# --no-relax +AC_ARG_ENABLE(linker-no-relax, +[AS_HELP_STRING([--enable-linker-no-relax], + [compiler will always pass --no-relax to linker])], +[], +enable_linker_no_relax=no) + +if test x"$enable_linker_build_id" = xyes; then + if test x"$gcc_cv_ld_buildid" = xyes; then + AC_DEFINE(ENABLE_LD_BUILDID, 1, + [Define if gcc should always pass --build-id to linker.]) + else + AC_MSG_WARN(--build-id is not supported by your linker; --enable-linker-build-id ignored) + fi +fi + +# --no-relax +if test x"$enable_linker_no_relax" = xyes; then + AC_DEFINE(ENABLE_LD_NORELAX, 1, + [Define if gcc should always pass --no-relax to linker.]) + else + AC_MSG_WARN(--no-relax is not supported by your linker; --enable-linker-no-relax ignored) + fi +fi + # In binutils 2.21, GNU ld gained support for new emulations fully # supporting the Solaris 2 ABI. Detect their presence in the linker used. AC_CACHE_CHECK(linker *_sol2 emulation support, @@ -6224,7 +6300,8 @@ case "$target" in powerpc*-*-linux* | \ sparc*-*-linux* | \ s390*-*-linux* | \ - alpha*-*-linux*) + alpha*-*-linux* | \ + sw_64*-*-linux*) AC_ARG_WITH(long-double-128, [AS_HELP_STRING([--with-long-double-128], [use 128-bit long double by default])], diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 7d98ec4190aec574fcd77a924cc8f29af11de065..e61ba05abb4368ed16a9457cc419c266a70861ee 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -3518,6 +3518,8 @@ information have to. @item @uref{#sparcv9-x-solaris2,,sparcv9-*-solaris2*} @item +@uref{#sw_64-x-x,,sw_64*-*-*} +@item @uref{#c6x-x-x,,c6x-*-*} @item @uref{#tilegx-x-linux,,tilegx-*-linux*} @@ -4643,6 +4645,12 @@ zSeries system (64-bit) running GNU/Linux for zSeries@. zSeries system (64-bit) running TPF@. This platform is supported as cross-compilation target only. +@html +
+@end html +@anchor{sw_64-x-x} +@heading sw_64*-*-* + @html
@end html diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index 972512e8115331c4d7fefe137355bd0ad6b042ef..04c1c0ab6b11c4bdc0e8dae2cf9d45fc4bdc10d1 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -2399,6 +2399,43 @@ adjust_address_1 (rtx memref, machine_mode mode, poly_int64 offset, addr = gen_rtx_ZERO_EXTEND (address_mode, plus_constant (pointer_mode, XEXP (addr, 0), offset)); +#endif +#ifdef FLAG_SW64_INC_DEC + else if (GET_CODE (addr) == POST_INC) + ; + else if (GET_CODE (addr) == POST_DEC) + { + rtx term; + rtx reg = XEXP (addr, 0); + if (known_eq (offset, 0)) + term = GEN_INT (8); + else + term = GEN_INT (-24); + addr = gen_rtx_POST_MODIFY (mode, reg, + gen_rtx_PLUS (mode, reg, term)); + } + else if (GET_CODE (addr) == POST_MODIFY) + { + if (GET_CODE (XEXP (addr,1)) == PLUS) + { + if (CONSTANT_P (XEXP (XEXP (addr, 1), 1))) + { + rtx term; + rtx reg = XEXP (XEXP (addr, 1), 0); + if (known_eq (offset, 0)) + term = GEN_INT (8); + else + term = plus_constant (mode, + XEXP (XEXP (addr, 1), 1), -8); + if (term == const0_rtx) + XEXP (addr, 1) = XEXP (XEXP (addr, 1), 0); + else + addr = gen_rtx_POST_MODIFY (mode, reg, + gen_rtx_PLUS (mode, + reg, term)); + } + } + } #endif else addr = plus_constant (address_mode, addr, offset); diff --git a/gcc/explow.c b/gcc/explow.c index b838f03587083c8fca23d47dd710ed84f7a98115..ff74b7f4865f070b63a16eb9b77e7628adb84ccf 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -1250,7 +1250,11 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align, in SIZE for the hole that might result from the alignment operation. */ unsigned known_align = REGNO_POINTER_ALIGN (VIRTUAL_STACK_DYNAMIC_REGNUM); +#ifndef FLAG_SW64_90139 + // it change from 710 extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT; + // see the test pr20210303 if (known_align == 0) +#endif known_align = BITS_PER_UNIT; if (required_align > known_align) { diff --git a/gcc/expr.c b/gcc/expr.c index c468b5eb9f8a2bfa637a27f5e72182571c28ce9f..a8e8debf53ae97577f3ac585588e64aa4f0b6a1d 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -3811,6 +3811,9 @@ emit_move_insn (rtx x, rtx y) rtx_insn *last_insn; rtx set; +#ifdef FLAG_SW64_M32 + if (!TARGET_SW_M32) +#endif gcc_assert (mode != BLKmode && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode)); diff --git a/gcc/final.c b/gcc/final.c index 807384514dbe89f8dbfd31b47a399587b68283fe..5ca16c80175f91c925b5b662ef4ef7db1a6898e3 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -1847,7 +1847,12 @@ profile_function (FILE *file ATTRIBUTE_UNUSED) { int align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); switch_to_section (data_section); +#ifdef FLAG_SW64_DELNOP + if (flag_sw_delnop == 0) + ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); +#else ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); +#endif targetm.asm_out.internal_label (file, "LP", current_function_funcdef_no); assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); } @@ -2466,9 +2471,14 @@ final_scan_insn_1 (rtx_insn *insn, FILE *file, int optimize_p ATTRIBUTE_UNUSED, #else #ifdef ASM_OUTPUT_ALIGN_WITH_NOP ASM_OUTPUT_ALIGN_WITH_NOP (file, alignment.levels[0].log); +#else +#ifdef FLAG_SW64_DELNOP + if (flag_sw_delnop == 0) + ASM_OUTPUT_ALIGN (file, alignment.levels[0].log); #else ASM_OUTPUT_ALIGN (file, alignment.levels[0].log); #endif +#endif #endif } } @@ -2502,7 +2512,12 @@ final_scan_insn_1 (rtx_insn *insn, FILE *file, int optimize_p ATTRIBUTE_UNUSED, #else log_align = exact_log2 (BIGGEST_ALIGNMENT / BITS_PER_UNIT); #endif +#ifdef FLAG_SW64_DELNOP + if (flag_sw_delnop == 0) + ASM_OUTPUT_ALIGN (file, log_align); +#else ASM_OUTPUT_ALIGN (file, log_align); +#endif } else switch_to_section (current_function_section ()); diff --git a/gcc/flags.h b/gcc/flags.h index 921f4390581fdc837e400a1d069eb0e77c0529ff..31d5b08806b87c86734948c5677ace53e9ca58bb 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -39,6 +39,10 @@ extern bool fast_math_flags_struct_set_p (struct cl_optimization *); extern bool final_insns_dump_p; +#ifdef SW64_TARGET_SUPPORT_FPCR +extern int flag_fpcr_set; +extern int stfp3_flag; +#endif /* Other basic status info about current function. */ diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c index 70219a537b983fea3b6bcbcb26c8affe3f21dbaa..fe1e13d16d2643b9263feb8897ff64b08d5a114c 100644 --- a/gcc/fortran/interface.c +++ b/gcc/fortran/interface.c @@ -3261,10 +3261,18 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal, "at %L", f->sym->name, actual_size, formal_size, &a->expr->where); else +#ifdef FLAG_SW64_90139 //close this for it will cause speccpu 416 build err + gfc_warning (OPT_Wargument_mismatch, + "Actual argument contains too few " + "elements for dummy argument %qs (%lu/%lu) " + "at %L.Please add -std=legacy options", f->sym->name, actual_size, + formal_size, &a->expr->where); +#else gfc_error_now ("Actual argument contains too few " "elements for dummy argument %qs (%lu/%lu) " "at %L", f->sym->name, actual_size, formal_size, &a->expr->where); +#endif } return false; } diff --git a/gcc/gcc.c b/gcc/gcc.c index efa0b53ce97736783c05d8aa8bc239156aca5049..8f42186dd5a8a801a21c834b312f7f5ff3aa20ea 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -1844,6 +1844,12 @@ init_spec (void) } #endif +/* --no-relax for sw_64 */ +#ifdef ENABLE_LD_NORELAX +#define LINK_NORELAX_SPEC "%{!r:--no-relax} " + obstack_grow (&obstack, LINK_NORELAX_SPEC, sizeof (LINK_NORELAX_SPEC) - 1); +#endif + #if defined LINK_EH_SPEC || defined LINK_BUILDID_SPEC || \ defined LINKER_HASH_STYLE # ifdef LINK_BUILDID_SPEC diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c index 061aef39c2d5e984bb699c3ef3791f0edffb1cda..3c2540edf856d0fbad6ccd223cdf6ba285eaaab7 100644 --- a/gcc/gimple-match-head.c +++ b/gcc/gimple-match-head.c @@ -1233,6 +1233,11 @@ optimize_pow_to_exp (tree arg0, tree arg1) case PLUS_EXPR: case MINUS_EXPR: break; +#ifdef FLAG_SW64_FM + case PAREN_EXPR: + if (flag_sw_fast_math == 1) +#endif + return false; default: return true; } diff --git a/gcc/optabs.c b/gcc/optabs.c index 64a1a17686b7dfe11a40291703ac781458c53749..943c61ae289177ed63b4aa0951405b5201797595 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6309,7 +6309,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, enum memmodel fail_model) { machine_mode mode = GET_MODE (mem); +#ifdef FLAG_SW64_ATOMIC + class expand_operand ops[9]; + rtx imust=gen_reg_rtx(DImode); +#else class expand_operand ops[8]; +#endif enum insn_code icode; rtx target_oval, target_bool = NULL_RTX; rtx libfunc; @@ -6358,7 +6363,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, create_integer_operand (&ops[5], is_weak); create_integer_operand (&ops[6], succ_model); create_integer_operand (&ops[7], fail_model); +#ifdef FLAG_SW64_ATOMIC + create_fixed_operand (&ops[8], imust); + if (maybe_expand_insn (icode, 9, ops)) +#else if (maybe_expand_insn (icode, 8, ops)) +#endif { /* Return success/failure. */ target_bool = ops[0].value; diff --git a/gcc/sync-builtins.def b/gcc/sync-builtins.def index 156a13ce0f8bfb238da4ca208ef90510ffac4c8a..ee9d8207487fa8b9d593a9e9b19489a92fbbcd9e 100644 --- a/gcc/sync-builtins.def +++ b/gcc/sync-builtins.def @@ -256,6 +256,8 @@ DEF_SYNC_BUILTIN (BUILT_IN_SYNC_LOCK_RELEASE_16, "__sync_lock_release_16", DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE, "__sync_synchronize", BT_FN_VOID, ATTR_NOTHROWCALL_LEAF_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE_WRITE, "__sync_synchronize_write", + BT_FN_VOID, ATTR_NOTHROWCALL_LEAF_LIST) /* __sync* builtins for the C++ memory model. */ diff --git a/gcc/target-insns.def b/gcc/target-insns.def index e80361f0a48aaae70a702db8e6abaa5a852055b7..099121b9d4ba14b3a149492e3276921c2627b34b 100644 --- a/gcc/target-insns.def +++ b/gcc/target-insns.def @@ -60,6 +60,7 @@ DEF_TARGET_INSN (jump, (rtx x0)) DEF_TARGET_INSN (load_multiple, (rtx x0, rtx x1, rtx x2)) DEF_TARGET_INSN (mem_thread_fence, (rtx x0)) DEF_TARGET_INSN (memory_barrier, (void)) +DEF_TARGET_INSN (write_memory_barrier, (void)) DEF_TARGET_INSN (memory_blockage, (void)) DEF_TARGET_INSN (movstr, (rtx x0, rtx x1, rtx x2)) DEF_TARGET_INSN (nonlocal_goto, (rtx x0, rtx x1, rtx x2, rtx x3)) diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C index 2e0ef685f36fa0482b800a0078200d015fe35d1c..60b8f15a97840427b80a9a2208321966b023c50f 100644 --- a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C @@ -1,6 +1,6 @@ // PR c++/49673: check that test_data goes into .rodata // { dg-do compile { target c++11 } } -// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } } +// { dg-additional-options -G0 { target { { alpha*-*-* sw_64*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } } // { dg-final { scan-assembler "\\.rdata" { target mips*-*-* } } } // { dg-final { scan-assembler "rodata" { target { { *-*-linux-gnu *-*-gnu* *-*-elf } && { ! { mips*-*-* riscv*-*-* } } } } } } diff --git a/gcc/testsuite/g++.dg/opt/devirt2.C b/gcc/testsuite/g++.dg/opt/devirt2.C index cf4842bd4df346d241ca8d9d0e7bf39403f5e23d..3417372868987939e3da8b087605cec7b9314648 100644 --- a/gcc/testsuite/g++.dg/opt/devirt2.C +++ b/gcc/testsuite/g++.dg/opt/devirt2.C @@ -5,7 +5,7 @@ // { dg-additional-options "-mshort-calls" {target epiphany-*-*} } // Using -mno-abicalls avoids a R_MIPS_JALR .reloc. // { dg-additional-options "-mno-abicalls" { target mips*-*-* } } -// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } } +// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* sw_64*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } } // For *-*-mingw* there is additionally one .def match // { dg-final { scan-assembler-times "xyzzy" 3 { target *-*-mingw* } } } // The IA64 and HPPA compilers generate external declarations in addition diff --git a/gcc/testsuite/g++.dg/pr49718.C b/gcc/testsuite/g++.dg/pr49718.C index b1cc5deb7ac60c2c5e4d19583bf65fb2de68c7f3..13c661642de43c23afe21f3e9f24aac89ae7dbf7 100644 --- a/gcc/testsuite/g++.dg/pr49718.C +++ b/gcc/testsuite/g++.dg/pr49718.C @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -finstrument-functions" } */ -/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* } } */ +/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* sw_64*-*-* } } */ /* { dg-additional-options "-mno-relax-pic-calls" { target mips*-*-* } } */ /* { dg-final { scan-assembler-times "__cyg_profile_func_enter" 1 { target { ! { hppa*-*-hpux* } } } } } */ /* { dg-final { scan-assembler-times "__cyg_profile_func_enter,%r" 1 { target hppa*-*-hpux* } } } */ diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c index 649e168e0b1d8e8097aa4752a659d2ec77c23f59..255054b493fc5e4687e3cfee456560d2997779d4 100644 --- a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c +++ b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c @@ -28,6 +28,9 @@ #elif defined (__aarch64__) /* On AArch64 integer division by zero does not trap. */ # define DO_TEST 0 +#elif defined (__sw_64__) + /* On Sw_64 integer division by zero does not trap. */ +# define DO_TEST 0 #elif defined (__TMS320C6X__) /* On TI C6X division by zero does not trap. */ # define DO_TEST 0 diff --git a/gcc/testsuite/gcc.dg/20020312-2.c b/gcc/testsuite/gcc.dg/20020312-2.c index 52c33d09b90a94e52c498fa78a96cbd37952366e..51e2c939af716b486c06b3084c3aab63828c5fd2 100644 --- a/gcc/testsuite/gcc.dg/20020312-2.c +++ b/gcc/testsuite/gcc.dg/20020312-2.c @@ -15,6 +15,8 @@ extern void abort (void); #if defined(__alpha__) /* PIC register is $29, but is used even without -fpic. */ +#elif defined(__sw_64__) +/* PIC register is $29, but is used even without -fpic. */ #elif defined(__arc__) # define PIC_REG "26" #elif defined(__arm__) diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c index 692c64ad2073781060df4748fcb996f7f2fbb935..2f545764565bf55f52894529d9435cf0d139ade9 100644 --- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c +++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c @@ -24,7 +24,7 @@ | FE_OVERFLOW \ | FE_UNDERFLOW) -#if defined __alpha__ || defined __aarch64__ +#if defined __alpha__ || defined __aarch64__ || defined __sw_64__ #define ITER_COUNT 100 #else #define ITER_COUNT 10000 diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c index a3d95c4e587d9a8786afb7aec5ee691ab82277ca..3a89d29a0c153504851dee0ed91928ae10d5fa33 100644 --- a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c +++ b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c @@ -47,8 +47,8 @@ typedef __SIZE_TYPE__ size_t; /* The following tests fail because of missing range information. The xfail exclusions are PR79356. */ -TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* } } } } */ -TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* } } } } */ +TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* sw_64*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* } } } } */ +TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* sw_64*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* } } } } */ TEST (int, INT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */ TEST (int, -3, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */ TEST (int, -2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */ diff --git a/gcc/testsuite/gcc.dg/cpp/assert4.c b/gcc/testsuite/gcc.dg/cpp/assert4.c index 92e3dba5ce6ca58105aee20e7f220d32cdb90bcc..1b40ddeb6c367529749146a81ce24ccbf969998f 100644 --- a/gcc/testsuite/gcc.dg/cpp/assert4.c +++ b/gcc/testsuite/gcc.dg/cpp/assert4.c @@ -151,8 +151,8 @@ || (!defined __alpha_ev4__ && #cpu(ev4)) # error # endif -#elif #cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \ - || #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4) +#elif (#cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \ + || #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)) && !#cpu(sw_64) # error #endif diff --git a/gcc/testsuite/gcc.dg/pr44194-1.c b/gcc/testsuite/gcc.dg/pr44194-1.c index 20b74a5aa122e0ad57ed714812d9e91c71484260..7efd3b6abd2069a8f5e4f3c20782165ef14e0f02 100644 --- a/gcc/testsuite/gcc.dg/pr44194-1.c +++ b/gcc/testsuite/gcc.dg/pr44194-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */ +/* { dg-do compile { target { { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } } && { ! sw_64*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */ /* { dg-options "-O2 -fdump-rtl-dse1 -fdump-rtl-final" } */ /* Restrict to 64-bit targets since 32-bit targets usually return small diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c index be1254a7348d8b9fedec182c186653746e70c4fa..70d0948db44ba932d9a6d2d016c9d489a11ee206 100644 --- a/gcc/testsuite/gcc.dg/stack-usage-1.c +++ b/gcc/testsuite/gcc.dg/stack-usage-1.c @@ -31,6 +31,8 @@ # define SIZE 192 #elif defined (__alpha__) # define SIZE 240 +#elif defined (__sw_64__) +# define SIZE 240 #elif defined (__ia64__) # define SIZE 272 #elif defined(__mips__) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c index 3e07a359b5560e7bb93eb0320fab84dccd642756..ce3a9d080d74e01a937b4b7d87fffa40f0d2589e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c @@ -33,4 +33,4 @@ void test55 (int x, int y) that the && should be emitted (based on BRANCH_COST). Fix this by teaching dom to look through && and register all components as true. */ -/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* sw_64*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-*" } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c index e4daa9d4ff38304ef9cf8db02a246bbb9bfa1486..d5342cf3a6e2934163d690eee92b967e82ecf54a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c @@ -27,4 +27,4 @@ foo () but the loop reads only one element at a time, and DOM cannot resolve these. The same happens on powerpc depending on the SIMD support available. */ -/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */ +/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* sw_64*-*-* hppa*64*-*-* nvptx*-*-* } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c index 0224997f18af44a416b4f7c85f1e6545d831ca92..81884e7b1ca1efb92ae69f50622bccc3ff0df0dd 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c @@ -23,7 +23,7 @@ f1 (int i, ...) } /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -44,7 +44,7 @@ f2 (int i, ...) architecture or bytes on 64-bit architecture. */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -63,7 +63,7 @@ f3 (int i, ...) /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */ -/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ @@ -79,7 +79,7 @@ f4 (int i, ...) } /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -98,7 +98,7 @@ f5 (int i, ...) } /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -119,7 +119,7 @@ f6 (int i, ...) } /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -137,7 +137,7 @@ f7 (int i, ...) } /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -157,7 +157,7 @@ f8 (int i, ...) } /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -175,7 +175,7 @@ f9 (int i, ...) } /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -195,7 +195,7 @@ f10 (int i, ...) } /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -216,7 +216,7 @@ f11 (int i, ...) } /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -237,7 +237,7 @@ f12 (int i, ...) } /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ -/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -258,7 +258,7 @@ f13 (int i, ...) } /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ -/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -279,7 +279,7 @@ f14 (int i, ...) } /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ -/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -308,7 +308,7 @@ f15 (int i, ...) /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* We may be able to improve upon this after fixing PR66010/PR66013. */ -/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ /* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c index d044654e0416c2f7abe67c1d9f4a9b3221d57e4a..d92290bb02da32d9d36fa3ffdf40deef3222c5a5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c @@ -22,7 +22,7 @@ f1 (int i, ...) } /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -38,7 +38,7 @@ f2 (int i, ...) } /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -57,7 +57,7 @@ f3 (int i, ...) } /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -74,7 +74,7 @@ f4 (int i, ...) } /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -91,7 +91,7 @@ f5 (int i, ...) } /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -110,7 +110,7 @@ f6 (int i, ...) } /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -127,7 +127,7 @@ f7 (int i, ...) } /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -144,7 +144,7 @@ f8 (int i, ...) } /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -161,7 +161,7 @@ f10 (int i, ...) } /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -178,7 +178,7 @@ f11 (int i, ...) } /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -195,7 +195,7 @@ f12 (int i, ...) } /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c index 1a637d6efe4cb46852f0c75b509853d8b41f8238..8b2f38929a785d24d654bcd90385601616b91ed5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c @@ -25,7 +25,7 @@ f1 (int i, ...) } /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -43,7 +43,7 @@ f2 (int i, ...) } /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ -/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -67,7 +67,7 @@ f3 (int i, ...) } /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ @@ -89,7 +89,7 @@ f4 (int i, ...) } /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ -/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c index c8ad4fe320db9f8139aba22b9fb92f1d2a5d4b87..c3eba1e21d7742060fc0b70632ddd355051fb07b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c @@ -23,7 +23,7 @@ f1 (int i, ...) va_end (ap); } /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ @@ -37,7 +37,7 @@ f2 (int i, ...) va_end (ap); } /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ @@ -56,7 +56,7 @@ f3 (int i, ...) } } /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ @@ -75,7 +75,7 @@ f4 (int i, ...) } } /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ @@ -90,7 +90,7 @@ f5 (int i, ...) bar (__real__ ci + __imag__ ci); } /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ @@ -105,7 +105,7 @@ f6 (int i, ...) bar (__real__ ci + __imag__ cd); } /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */ @@ -120,6 +120,6 @@ f7 (int i, ...) bar (__real__ cd + __imag__ cd); } /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ -/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c index be7bc0d12b3c5b293d9a841e98f015f338356e64..c2db580cb9bb4f7eb5e2c10bfd488f781b49d1da 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c @@ -28,7 +28,7 @@ bar (int x, char const *y, ...) /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ -/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ +/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ diff --git a/gcc/testsuite/go.test/go-test.exp b/gcc/testsuite/go.test/go-test.exp index 51f9b381d677039d69da38ef1e9a3df9b8fb1517..18e866ad32eb11f6140d374566822b9dd9cec869 100644 --- a/gcc/testsuite/go.test/go-test.exp +++ b/gcc/testsuite/go.test/go-test.exp @@ -193,6 +193,9 @@ proc go-set-goarch { } { "alpha*-*-*" { set goarch "alpha" } + "sw_64*-*-*" { + set goarch "sw_64" + } "arm*-*-*" - "ep9312*-*-*" - "strongarm*-*-*" - diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index bd62a0d9e799fc703935d24c565ec1beb823a1d5..b618c2eed8eca4a565779a85fd77a147d311cedc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3306,6 +3306,7 @@ proc check_effective_target_vect_cmdline_needed { } { return [check_cached_effective_target vect_cmdline_needed { if { [istarget alpha*-*-*] || [istarget ia64-*-*] + || [istarget sw_64-*-*] || (([istarget i?86-*-*] || [istarget x86_64-*-*]) && ![is-effective-target ia32]) || ([istarget powerpc*-*-*] @@ -3334,6 +3335,7 @@ proc check_effective_target_vect_int { } { || [istarget amdgcn-*-*] || [istarget sparc*-*-*] || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || [istarget ia64-*-*] || [istarget aarch64*-*-*] || [is-effective-target arm_neon] @@ -6451,6 +6453,7 @@ proc check_effective_target_vect_no_int_min_max { } { return [check_cached_effective_target_indexed vect_no_int_min_max { expr { [istarget sparc*-*-*] || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_loongson_mmi]) }}] } @@ -6463,7 +6466,7 @@ proc check_effective_target_vect_no_int_min_max { } { proc check_effective_target_vect_no_int_add { } { # Alpha only supports vector add on V8QI and V4HI. return [check_cached_effective_target_indexed vect_no_int_add { - expr { [istarget alpha*-*-*] }}] + expr { [istarget alpha*-*-*] || [istarget sw_64*-*-*] }}] } # Return 1 if the target plus current options does not support vector @@ -7545,6 +7548,7 @@ proc check_effective_target_sync_long_long { } { || [istarget aarch64*-*-*] || [istarget arm*-*-*] || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || ([istarget sparc*-*-*] && [check_effective_target_lp64]) || [istarget s390*-*-*] } { return 1 @@ -7626,6 +7630,7 @@ proc check_effective_target_sync_long_long_runtime { } { } } "" ]) || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || ([istarget sparc*-*-*] && [check_effective_target_lp64] && [check_effective_target_ultrasparc_hw]) @@ -7642,6 +7647,7 @@ proc check_effective_target_bswap { } { return [check_cached_effective_target bswap { expr { [istarget aarch64*-*-*] || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget m68k-*-*] || [istarget powerpc*-*-*] @@ -7666,6 +7672,7 @@ proc check_effective_target_sync_int_long { } { || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget aarch64*-*-*] || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || [istarget arm*-*-linux-*] || [istarget arm*-*-uclinuxfdpiceabi] || ([istarget arm*-*-*] @@ -7690,6 +7697,7 @@ proc check_effective_target_sync_char_short { } { || [istarget ia64-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || [istarget arm*-*-linux-*] || [istarget arm*-*-uclinuxfdpiceabi] || ([istarget arm*-*-*] @@ -8118,6 +8126,7 @@ proc check_effective_target_fd_truncate { } { proc add_options_for_ieee { flags } { if { [istarget alpha*-*-*] + || [istarget sw_64*-*-*] || [istarget sh*-*-*] } { return "$flags -mieee" } diff --git a/gcc/toplev.c b/gcc/toplev.c index 51e6bd400ea2e8ad09498e55e70518cc1aa545c0..5b90804d70143aa082f7778fe350c6a0985225ca 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -2214,6 +2214,18 @@ do_compile () { process_options (); +#ifdef FLAG_SW64_M32 + if (TARGET_SW_M32) + { + char cwd[200]; + getcwd (cwd, sizeof (cwd)); + if (strstr (cwd, "429") == NULL) + target_flags = target_flags & (~MASK_SW_M32); + else + flag_tree_parallelize_loops = 1; + } +#endif + /* Don't do any more if an error has already occurred. */ if (!seen_error ()) { diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 781831c391822c517e3ff425d1eca17d5912daa2..933619dd2e36ba7ef58f3e4ac100e35c651c0d19 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1307,7 +1307,11 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor, /* At most param_simultaneous_prefetches should be running at the same time. */ +#ifdef FLAG_SW64_PREFETCH + remaining_prefetch_slots = param_simultaneous_prefetches * 5; +#else remaining_prefetch_slots = param_simultaneous_prefetches; +#endif /* The prefetch will run for AHEAD iterations of the original loop, i.e., AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration, @@ -1331,8 +1335,10 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor, /* The loop is far from being sufficiently unrolled for this prefetch. Do not generate prefetch to avoid many redudant prefetches. */ +#ifndef FLAG_SW64_PREFETCH if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO) continue; +#endif /* If we need to prefetch the reference each PREFETCH_MOD iterations, and we unroll the loop UNROLL_FACTOR times, we need to insert @@ -1403,6 +1409,19 @@ estimate_prefetch_count (struct mem_ref_group *groups, unsigned unroll_factor) return prefetch_count; } +#ifdef FLAG_SW64_PREFETCH +/* Due to the need for SW to dynamically adjust the value of PF during + prefetching, PF needs to handle negative values.However ,since Common + Joined UInteger Var(PFX) is used, the function needs to convert unsig + ned (0-200) to (-100,100) */ +int convert_default_to_sw(unsigned int pf_value) +{ + if(pf_value > 100) + return 100 - (int)pf_value; + return pf_value; +} +#endif + /* Issue prefetches for the reference REF into loop as decided before. HEAD is the number of iterations to prefetch ahead. UNROLL_FACTOR is the factor by which LOOP was unrolled. */ @@ -1437,8 +1456,14 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead) if (cst_and_fits_in_hwi (ref->group->step)) { /* Determine the address to prefetch. */ +#ifdef FLAG_SW64_PREFETCH + delta = (ahead + ap * ref->prefetch_mod) * + int_cst_value (ref->group->step) * 2; +#else delta = (ahead + ap * ref->prefetch_mod) * int_cst_value (ref->group->step); +#endif + addr = fold_build_pointer_plus_hwi (addr_base, delta); addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL, true, GSI_SAME_STMT); @@ -1628,8 +1653,21 @@ should_unroll_loop_p (class loop *loop, class tree_niter_desc *desc, as well; but the unrolling/prefetching is usually more profitable for loops consisting of a single basic block, and we want to limit the code growth. */ +#ifdef FLAG_SW64_PREFETCH + if (flag_sw_prefetch_unroll == 1) + { + if (loop->num_nodes > 7) + return false; + } + else + { + if (loop->num_nodes > 2) + return false; + } +#else if (loop->num_nodes > 2) return false; +#endif return true; } @@ -1675,6 +1713,12 @@ determine_unroll_factor (class loop *loop, struct mem_ref_group *refs, if (should_issue_prefetch_p (ref)) { mod_constraint = ref->prefetch_mod; +#ifdef FLAG_SW64_PREFETCH + /* TODO: mod_constraint is set to 4 by experience, + but we should do it with precision. */ + if (mod_constraint > upper_bound) + mod_constraint = 4; +#endif nfactor = least_common_multiple (mod_constraint, factor); if (nfactor <= upper_bound) factor = nfactor; diff --git a/include/longlong.h b/include/longlong.h index 22bd54604a8d84b3a2b20f187fa3feb60faa221f..5c7b5a0a1d2eb47db91b6d8e42816ca62b635a47 100644 --- a/include/longlong.h +++ b/include/longlong.h @@ -1458,6 +1458,60 @@ extern UDItype __umulsidi3 (USItype, USItype); #define UDIV_TIME 230 #endif /* sparc64 */ +#if defined (__sw_64) && W_TYPE_SIZE == 64 +/* There is a bug in g++ before version 5 that + errors on __builtin_sw_64_umulh. */ +#if !defined(__cplusplus) || __GNUC__ >= 5 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_sw_64_umulh (__m0, __m1); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 46 +#endif /* !c++ */ +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UDItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#ifdef __sw_64_cix__ +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) +#define COUNT_LEADING_ZEROS_0 64 +#else +#define count_leading_zeros(COUNT,X) \ + do { \ + UDItype __xr = (X), __t, __a; \ + __t = __builtin_sw_64_cmpbge (0, __xr); \ + __a = __clz_tab[__t ^ 0xff] - 1; \ + __t = __builtin_sw_64_extbl (__xr, __a); \ + (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ + } while (0) +#define count_trailing_zeros(COUNT,X) \ + do { \ + UDItype __xr = (X), __t, __a; \ + __t = __builtin_sw_64_cmpbge (0, __xr); \ + __t = ~__t & -~__t; \ + __a = ((__t & 0xCC) != 0) * 2; \ + __a += ((__t & 0xF0) != 0) * 4; \ + __a += ((__t & 0xAA) != 0); \ + __t = __builtin_sw_64_extbl (__xr, __a); \ + __a <<= 3; \ + __t &= -__t; \ + __a += ((__t & 0xCC) != 0) * 2; \ + __a += ((__t & 0xF0) != 0) * 4; \ + __a += ((__t & 0xAA) != 0); \ + (COUNT) = __a; \ + } while (0) +#endif /* __sw_64_cix__ */ +#endif /* __sw_64 */ +//__sw_64 + #if defined (__vax__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ diff --git a/intl/dcigettext.c b/intl/dcigettext.c index a8d4a14d273b153b117b507ec76356635ccd876e..281f9340b310c014cff36a16fffba0ae9c975a70 100644 --- a/intl/dcigettext.c +++ b/intl/dcigettext.c @@ -73,7 +73,7 @@ extern int errno; /* Guess whether integer division by zero raises signal SIGFPE. Set to 1 only if you know for sure. In case of doubt, set to 0. */ # if defined __alpha__ || defined __arm__ || defined __i386__ \ - || defined __m68k__ || defined __s390__ + || defined __m68k__ || defined __s390__ || defined __sw_64__ # define INTDIV0_RAISES_SIGFPE 1 # else # define INTDIV0_RAISES_SIGFPE 0 diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt index 5dd0926d207f1a542a7a7ee4cc00084c3c74bdfd..423324de98dea865a38cdb163cbbe311c2ff70ce 100644 --- a/libatomic/configure.tgt +++ b/libatomic/configure.tgt @@ -81,6 +81,12 @@ case "${target_cpu}" in ARCH=sparc ;; + sw_64*) + # fenv.c needs this option to generate inexact exceptions. + XCFLAGS="${XCFLAGS} -mfp-trap-mode=sui" + ARCH=sw_64 + ;; + i[3456]86) case " ${CC} ${CFLAGS} " in *" -m64 "*|*" -mx32 "*) diff --git a/libcpp/lex.c b/libcpp/lex.c index 665297af776bb81d615122e2794d6300a2299385..df0329f6134dfb3fe7f2b22c3a52f479527b82d7 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -168,6 +168,8 @@ acc_char_cmp (word_type val, word_type c) /* We can get exact results using a compare-bytes instruction. Get (val == c) via (0 >= (val ^ c)). */ return __builtin_alpha_cmpbge (0, val ^ c); +#elif defined(__GNUC__) && defined(__sw_64__) + return __builtin_sw_64_cmpbge (0, val ^ c); #else word_type magic = 0x7efefefeU; if (sizeof(word_type) == 8) @@ -186,7 +188,7 @@ static inline int acc_char_index (word_type cmp ATTRIBUTE_UNUSED, word_type val ATTRIBUTE_UNUSED) { -#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN +#if defined(__GNUC__) && (defined(__alpha__) || defined(__sw_64__))&& !WORDS_BIGENDIAN /* The cmpbge instruction sets *bits* of the result corresponding to matches in the bytes with no false positives. */ return __builtin_ctzl (cmp); diff --git a/libffi/Makefile.in b/libffi/Makefile.in index 745bdd80777b4f6ca08c18ed698b6466200a4b08..779b0e02302d3e9009a32a2cb90addcbcd8c0b72 100644 --- a/libffi/Makefile.in +++ b/libffi/Makefile.in @@ -552,6 +552,7 @@ noinst_HEADERS = \ src/sh/ffitarget.h \ src/sh64/ffitarget.h \ src/sparc/ffitarget.h src/sparc/internal.h \ + src/sw_64/ffitarget.h src/sw_64/internal.h \ src/tile/ffitarget.h \ src/vax/ffitarget.h \ src/x86/ffitarget.h src/x86/internal.h src/x86/internal64.h \ @@ -588,6 +589,7 @@ EXTRA_libffi_la_SOURCES = \ src/sh/ffi.c src/sh/sysv.S \ src/sh64/ffi.c src/sh64/sysv.S \ src/sparc/ffi.c src/sparc/ffi64.c src/sparc/v8.S src/sparc/v9.S \ + src/sw_64/ffi.c src/sw_64/sysv.S \ src/tile/ffi.c src/tile/tile.S \ src/vax/ffi.c src/vax/elfbsd.S \ src/x86/ffi.c src/x86/sysv.S \ @@ -1012,6 +1014,16 @@ src/sparc/v8.lo: src/sparc/$(am__dirstamp) \ src/sparc/$(DEPDIR)/$(am__dirstamp) src/sparc/v9.lo: src/sparc/$(am__dirstamp) \ src/sparc/$(DEPDIR)/$(am__dirstamp) +src/sw_64/$(am__dirstamp): + @$(MKDIR_P) src/sw_64 + @: > src/sw_64/$(am__dirstamp) +src/sw_64/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) src/sw_64/$(DEPDIR) + @: > src/sw_64/$(DEPDIR)/$(am__dirstamp) +src/sw_64/ffi.lo: src/sw_64/$(am__dirstamp) \ + src/sw_64/$(DEPDIR)/$(am__dirstamp) +src/sw_64/sysv.lo: src/sw_64/$(am__dirstamp) \ + src/sw_64/$(DEPDIR)/$(am__dirstamp) src/tile/$(am__dirstamp): @$(MKDIR_P) src/tile @: > src/tile/$(am__dirstamp) @@ -1129,6 +1141,10 @@ mostlyclean-compile: -rm -f src/sh64/*.lo -rm -f src/sparc/*.$(OBJEXT) -rm -f src/sparc/*.lo + -rm -f src/sw_64/ffi.$(OBJEXT) + -rm -f src/sw_64/ffi.lo + -rm -f src/sw_64/sysv.$(OBJEXT) + -rm -f src/sw_64/sysv.lo -rm -f src/tile/*.$(OBJEXT) -rm -f src/tile/*.lo -rm -f src/vax/*.$(OBJEXT) @@ -1211,6 +1227,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/ffi64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/v8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/v9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/sw_64/$(DEPDIR)/ffi.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/sw_64/$(DEPDIR)/sysv.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/tile/$(DEPDIR)/ffi.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/tile/$(DEPDIR)/tile.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/vax/$(DEPDIR)/elfbsd.Plo@am__quote@ @@ -1307,6 +1325,7 @@ clean-libtool: -rm -rf src/sh/.libs src/sh/_libs -rm -rf src/sh64/.libs src/sh64/_libs -rm -rf src/sparc/.libs src/sparc/_libs + -rm -rf src/sw_64/.libs src/sw_64/_libs -rm -rf src/tile/.libs src/tile/_libs -rm -rf src/vax/.libs src/vax/_libs -rm -rf src/x86/.libs src/x86/_libs @@ -1669,6 +1688,8 @@ distclean-generic: -rm -f src/sh64/$(am__dirstamp) -rm -f src/sparc/$(DEPDIR)/$(am__dirstamp) -rm -f src/sparc/$(am__dirstamp) + -rm -f src/sw_64/$(DEPDIR)/$(am__dirstamp) + -rm -f src/sw_64/$(am__dirstamp) -rm -f src/tile/$(DEPDIR)/$(am__dirstamp) -rm -f src/tile/$(am__dirstamp) -rm -f src/vax/$(DEPDIR)/$(am__dirstamp) @@ -1691,7 +1712,7 @@ clean-am: clean-aminfo clean-generic clean-libtool clean-local \ distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) - -rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR) + -rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/sw_64/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-libtool distclean-local distclean-tags @@ -1830,7 +1851,7 @@ installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache - -rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR) + -rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/sw_64/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-aminfo \ maintainer-clean-generic maintainer-clean-local \ diff --git a/libffi/configure.host b/libffi/configure.host index 786b32c5bb00c3efb76d6daf15a024ea4c7e00d4..c9a3ecad632eced71e5a20d2ceb1e499cd71f1df 100644 --- a/libffi/configure.host +++ b/libffi/configure.host @@ -219,6 +219,13 @@ case "${host}" in SOURCES="ffi.c ffi64.c v8.S v9.S" ;; + sw_64*-*-*) + TARGET=SW_64; TARGETDIR=sw_64; + # Support 128-bit long double, changeable via command-line switch. + HAVE_LONG_DOUBLE='defined(__LONG_DOUBLE_128__)' + SOURCES="ffi.c sysv.S" + ;; + tile*-*) TARGET=TILE; TARGETDIR=tile SOURCES="ffi.c tile.S" diff --git a/libffi/src/sw_64/ffi.c b/libffi/src/sw_64/ffi.c new file mode 100644 index 0000000000000000000000000000000000000000..c882641148a4ba20c8d475a9995abbcb211fd27e --- /dev/null +++ b/libffi/src/sw_64/ffi.c @@ -0,0 +1,516 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2012 Anthony Green + Copyright (c) 1998, 2001, 2007, 2008 Red Hat, Inc. + + Sunway Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include +#include +#include +#include "internal.h" + +/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; + all further uses in this file will refer to the 128-bit type. */ +#if defined(__LONG_DOUBLE_128__) +#if FFI_TYPE_LONGDOUBLE != 4 +#error FFI_TYPE_LONGDOUBLE out of date +#endif +#else +#undef FFI_TYPE_LONGDOUBLE +#define FFI_TYPE_LONGDOUBLE 4 +#endif + +extern void +ffi_call_sysv (void *stack, void *frame, unsigned flags, void *raddr, + void (*fn) (void), void *closure) FFI_HIDDEN; +extern void +ffi_closure_sysv (void) FFI_HIDDEN; +extern void +ffi_go_closure_sysv (void) FFI_HIDDEN; + +/* Promote a float value to its in-register double representation. + Unlike actually casting to double, this does not trap on NaN. */ +static inline UINT64 +lds (void *ptr) +{ + UINT64 ret; + asm("flds %0,%1" : "=f"(ret) : "m"(*(UINT32 *) ptr)); + return ret; +} + +/* And the reverse. */ +static inline void +sts (void *ptr, UINT64 val) +{ + asm("fsts %1,%0" : "=m"(*(UINT32 *) ptr) : "f"(val)); +} + +ffi_status FFI_HIDDEN +ffi_prep_cif_machdep (ffi_cif *cif) +{ + size_t bytes = 0; + int flags, i, avn; + ffi_type *rtype, *itype; + + if (cif->abi != FFI_OSF) + return FFI_BAD_ABI; + + /* Compute the size of the argument area. */ + for (i = 0, avn = cif->nargs; i < avn; i++) + { + itype = cif->arg_types[i]; + switch (itype->type) + { + case FFI_TYPE_INT: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + /* All take one 8 byte slot. */ + bytes += 8; + break; + + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: + /* Passed by value in N slots. */ + bytes += ALIGN (itype->size, FFI_SIZEOF_ARG); + break; + + case FFI_TYPE_COMPLEX: + /* _Complex long double passed by reference; others in 2 slots. */ + if (itype->elements[0]->type == FFI_TYPE_LONGDOUBLE) + bytes += 8; + else + bytes += 16; + break; + + default: + abort (); + } + } + + /* Set the return type flag */ + rtype = cif->rtype; + switch (rtype->type) + { + case FFI_TYPE_VOID: + flags = SW_64_FLAGS (SW_64_ST_VOID, SW_64_LD_VOID); + break; + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT32); + break; + case FFI_TYPE_FLOAT: + flags = SW_64_FLAGS (SW_64_ST_FLOAT, SW_64_LD_FLOAT); + break; + case FFI_TYPE_DOUBLE: + flags = SW_64_FLAGS (SW_64_ST_DOUBLE, SW_64_LD_DOUBLE); + break; + case FFI_TYPE_UINT8: + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_UINT8); + break; + case FFI_TYPE_SINT8: + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_SINT8); + break; + case FFI_TYPE_UINT16: + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_UINT16); + break; + case FFI_TYPE_SINT16: + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_SINT16); + break; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT64); + break; + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + /* Passed in memory, with a hidden pointer. */ + flags = SW_64_RET_IN_MEM; + break; + case FFI_TYPE_COMPLEX: + itype = rtype->elements[0]; + switch (itype->type) + { + case FFI_TYPE_FLOAT: + flags = SW_64_FLAGS (SW_64_ST_CPLXF, SW_64_LD_CPLXF); + break; + case FFI_TYPE_DOUBLE: + flags = SW_64_FLAGS (SW_64_ST_CPLXD, SW_64_LD_CPLXD); + break; + default: + if (rtype->size <= 8) + flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT64); + else + flags = SW_64_RET_IN_MEM; + break; + } + break; + default: + abort (); + } + cif->flags = flags; + + /* Include the hidden structure pointer in args requirement. */ + if (flags == SW_64_RET_IN_MEM) + bytes += 8; + /* Minimum size is 6 slots, so that ffi_call_sysv can pop them. */ + if (bytes < 6 * 8) + bytes = 6 * 8; + cif->bytes = bytes; + + return FFI_OK; +} + +static unsigned long +extend_basic_type (void *valp, int type, int argn) +{ + switch (type) + { + case FFI_TYPE_SINT8: + return *(SINT8 *) valp; + case FFI_TYPE_UINT8: + return *(UINT8 *) valp; + case FFI_TYPE_SINT16: + return *(SINT16 *) valp; + case FFI_TYPE_UINT16: + return *(UINT16 *) valp; + + case FFI_TYPE_FLOAT: + if (argn < 6) + return lds (valp); + /* FALLTHRU */ + + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + /* Note that unsigned 32-bit quantities are sign extended. */ + return *(SINT32 *) valp; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + case FFI_TYPE_DOUBLE: + return *(UINT64 *) valp; + + default: + abort (); + } +} + +static void +ffi_call_int (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue, + void *closure) +{ + unsigned long *argp; + long i, avn, argn, flags = cif->flags; + ffi_type **arg_types; + void *frame; + + /* If the return value is a struct and we don't have a return + value address then we need to make one. */ + if (rvalue == NULL && flags == SW_64_RET_IN_MEM) + rvalue = alloca (cif->rtype->size); + + /* Allocate the space for the arguments, plus 4 words of temp + space for ffi_call_sysv. */ + argp = frame = alloca (cif->bytes + 4 * FFI_SIZEOF_ARG); + frame += cif->bytes; + + argn = 0; + if (flags == SW_64_RET_IN_MEM) + argp[argn++] = (unsigned long) rvalue; + + avn = cif->nargs; + arg_types = cif->arg_types; + + for (i = 0, avn = cif->nargs; i < avn; i++) + { + ffi_type *ty = arg_types[i]; + void *valp = avalue[i]; + int type = ty->type; + size_t size; + + switch (type) + { + case FFI_TYPE_INT: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + argp[argn] = extend_basic_type (valp, type, argn); + argn++; + break; + + case FFI_TYPE_LONGDOUBLE: + by_reference: + /* Note that 128-bit long double is passed by reference. */ + argp[argn++] = (unsigned long) valp; + break; + + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: + size = ty->size; + memcpy (argp + argn, valp, size); + argn += ALIGN (size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + break; + + case FFI_TYPE_COMPLEX: + type = ty->elements[0]->type; + if (type == FFI_TYPE_LONGDOUBLE) + goto by_reference; + + /* Most complex types passed as two separate arguments. */ + size = ty->elements[0]->size; + argp[argn] = extend_basic_type (valp, type, argn); + argp[argn + 1] = extend_basic_type (valp + size, type, argn + 1); + argn += 2; + break; + + default: + abort (); + } + } + + flags = (flags >> SW_64_ST_SHIFT) & 0xff; + ffi_call_sysv (argp, frame, flags, rvalue, fn, closure); +} + +void +ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +void +ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue, + void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif, + void (*fun) (ffi_cif *, void *, void **, void *), + void *user_data, void *codeloc) +{ + unsigned int *tramp; + + if (cif->abi != FFI_OSF) + return FFI_BAD_ABI; + + tramp = (unsigned int *) &closure->tramp[0]; + tramp[0] = 0x43fb0741; /* mov $27,$1 */ + tramp[1] = 0x8f7b0010; /* ldl $27,16($27) */ + tramp[2] = 0x0ffb0000; /* jmp $31,($27),0 */ + tramp[3] = 0x43ff075f; /* nop */ + *(void **) &tramp[4] = ffi_closure_sysv; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Flush the Icache. 0x86 is PAL_imb in Tru64 UNIX . */ + asm volatile("sys_call 0x86" : : : "memory"); + + return FFI_OK; +} + +ffi_status +ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif, + void (*fun) (ffi_cif *, void *, void **, void *)) +{ + if (cif->abi != FFI_OSF) + return FFI_BAD_ABI; + + closure->tramp = (void *) ffi_go_closure_sysv; + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; +} + +long FFI_HIDDEN +ffi_closure_sysv_inner (ffi_cif *cif, + void (*fun) (ffi_cif *, void *, void **, void *), + void *user_data, void *rvalue, unsigned long *argp) +{ + void **avalue; + ffi_type **arg_types; + long i, avn, argn, flags; + + avalue = alloca (cif->nargs * sizeof (void *)); + flags = cif->flags; + argn = 0; + + /* Copy the caller's structure return address to that the closure + returns the data directly to the caller. */ + if (flags == SW_64_RET_IN_MEM) + { + rvalue = (void *) argp[0]; + argn = 1; + } + + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + for (i = 0, avn = cif->nargs; i < avn; i++) + { + ffi_type *ty = arg_types[i]; + int type = ty->type; + void *valp = &argp[argn]; + size_t size; + + switch (type) + { + case FFI_TYPE_INT: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + argn += 1; + break; + + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: + size = ty->size; + argn += ALIGN (size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + break; + + case FFI_TYPE_FLOAT: + /* Floats coming from registers need conversion from double + back to float format. */ + if (argn < 6) + { + valp = &argp[argn - 6]; + sts (valp, argp[argn - 6]); + } + argn += 1; + break; + + case FFI_TYPE_DOUBLE: + if (argn < 6) + valp = &argp[argn - 6]; + argn += 1; + break; + + case FFI_TYPE_LONGDOUBLE: + by_reference: + /* 128-bit long double is passed by reference. */ + valp = (void *) argp[argn]; + argn += 1; + break; + + case FFI_TYPE_COMPLEX: + type = ty->elements[0]->type; + switch (type) + { + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + /* Passed as separate arguments, but they wind up sequential. */ + break; + + case FFI_TYPE_INT: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + /* Passed as separate arguments. Disjoint, but there's room + enough in one slot to hold the pair. */ + size = ty->elements[0]->size; + memcpy (valp + size, valp + 8, size); + break; + + case FFI_TYPE_FLOAT: + /* Passed as separate arguments. Disjoint, and each piece + may need conversion back to float. */ + if (argn < 6) + { + valp = &argp[argn - 6]; + sts (valp, argp[argn - 6]); + } + if (argn + 1 < 6) + sts (valp + 4, argp[argn + 1 - 6]); + else + *(UINT32 *) (valp + 4) = argp[argn + 1]; + break; + + case FFI_TYPE_DOUBLE: + /* Passed as separate arguments. Only disjoint if one part + is in fp regs and the other is on the stack. */ + if (argn < 5) + valp = &argp[argn - 6]; + else if (argn == 5) + { + valp = alloca (16); + ((UINT64 *) valp)[0] = argp[5 - 6]; + ((UINT64 *) valp)[1] = argp[6]; + } + break; + + case FFI_TYPE_LONGDOUBLE: + goto by_reference; + + default: + abort (); + } + argn += 2; + break; + + default: + abort (); + } + + avalue[i] = valp; + } + + /* Invoke the closure. */ + fun (cif, rvalue, avalue, user_data); + + /* Tell ffi_closure_sysv how to perform return type promotions. */ + return (flags >> SW_64_LD_SHIFT) & 0xff; +} diff --git a/libffi/src/sw_64/ffitarget.h b/libffi/src/sw_64/ffitarget.h new file mode 100644 index 0000000000000000000000000000000000000000..f5792e1dd68c284f23ca6441e38ec08379e16c19 --- /dev/null +++ b/libffi/src/sw_64/ffitarget.h @@ -0,0 +1,59 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 2012 Anthony Green + Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for Sunway. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error \ + "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi +{ + FFI_FIRST_ABI = 0, + FFI_OSF, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_OSF +} ffi_abi; +#endif + +#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION +#define FFI_TARGET_HAS_COMPLEX_TYPE + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_GO_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_NATIVE_RAW_API 0 + +#endif diff --git a/libffi/src/sw_64/internal.h b/libffi/src/sw_64/internal.h new file mode 100644 index 0000000000000000000000000000000000000000..92ad32179ff271fcb83b81242d9b9a61f8161abe --- /dev/null +++ b/libffi/src/sw_64/internal.h @@ -0,0 +1,23 @@ +#define SW_64_ST_VOID 0 +#define SW_64_ST_INT 1 +#define SW_64_ST_FLOAT 2 +#define SW_64_ST_DOUBLE 3 +#define SW_64_ST_CPLXF 4 +#define SW_64_ST_CPLXD 5 + +#define SW_64_LD_VOID 0 +#define SW_64_LD_INT64 1 +#define SW_64_LD_INT32 2 +#define SW_64_LD_UINT16 3 +#define SW_64_LD_SINT16 4 +#define SW_64_LD_UINT8 5 +#define SW_64_LD_SINT8 6 +#define SW_64_LD_FLOAT 7 +#define SW_64_LD_DOUBLE 8 +#define SW_64_LD_CPLXF 9 +#define SW_64_LD_CPLXD 10 + +#define SW_64_ST_SHIFT 0 +#define SW_64_LD_SHIFT 8 +#define SW_64_RET_IN_MEM 0x10000 +#define SW_64_FLAGS(S, L) (((L) << SW_64_LD_SHIFT) | (S)) diff --git a/libffi/src/sw_64/sysv.S b/libffi/src/sw_64/sysv.S new file mode 100644 index 0000000000000000000000000000000000000000..588cb6e76ccba75e6738aa23958aad1f57804540 --- /dev/null +++ b/libffi/src/sw_64/sysv.S @@ -0,0 +1,281 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 1998, 2001, 2007, 2008, 2011, 2014 Red Hat + + Sunway/SYSV Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ +#define LIBFFI_ASM +#include +#include +#include +#include "internal.h" + + .arch sw6a + .text + +/* Aid in building a direct addressed jump table, 4 insns per entry. */ +.macro E index + .align 4 + .org 99b + \index * 16 +.endm + +/* ffi_call_sysv (void *stack, void *frame, unsigned flags, + void *raddr, void (*fnaddr)(void), void *closure) + + Bit o trickiness here -- FRAME is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 4 + .globl ffi_call_sysv + .ent ffi_call_sysv + FFI_HIDDEN(ffi_call_sysv) + +ffi_call_sysv: + cfi_startproc + cfi_def_cfa($17, 32) + mov $16, $30 + stl $26, 0($17) + stl $15, 8($17) + mov $17, $15 + .prologue 0 + cfi_def_cfa_register($15) + cfi_rel_offset($26, 0) + cfi_rel_offset($15, 8) + + stl $18, 16($17) # save flags into frame + stl $19, 24($17) # save rvalue into frame + mov $20, $27 # fn into place for call + mov $21, $1 # closure into static chain + + # Load up all of the (potential) argument registers. + ldl $16, 0($30) + fldd $f16, 0($30) + fldd $f17, 8($30) + ldl $17, 8($30) + fldd $f18, 16($30) + ldl $18, 16($30) + fldd $f19, 24($30) + ldl $19, 24($30) + fldd $f20, 32($30) + ldl $20, 32($30) + fldd $f21, 40($30) + ldl $21, 40($30) + + # Deallocate the register argument area. + ldi $30, 48($30) + + call $26, ($27), 0 +0: + ldih $29, 0($26) !gpdisp!1 + ldl $2, 24($15) # reload rvalue + ldi $29, 0($29) !gpdisp!1 + ldl $3, 16($15) # reload flags + ldi $1, 99f-0b($26) + ldl $26, 0($15) + ldl $15, 8($15) + cfi_restore($26) + cfi_restore($15) + cfi_def_cfa($sp, 0) + seleq $2, 0, $3 # mash null rvalue to void + addl $3, $3, $3 + s8addl $3, $1, $1 # 99f + stcode * 16 + jmp $31, ($1), $st_int + + .align 4 +99: +E 0 + ret +E 1 +$st_int: + stl $0, 0($2) + ret +E 2 + fsts $f0, 0($2) + ret +E 4 + fstd $f0, 0($2) + ret +E 6 + fsts $f0, 0($2) + fsts $f1, 4($2) + ret +E 10 + fstd $f0, 0($2) + fstd $f1, 8($2) + ret + + cfi_endproc + .end ffi_call_sysv + +/* ffi_closure_sysv(...) + + Receives the closure argument in $1. */ + +#define CLOSURE_FS (16*8) + + .align 4 + .globl ffi_go_closure_sysv + .ent ffi_go_closure_sysv + FFI_HIDDEN(ffi_go_closure_sysv) + +ffi_go_closure_sysv: + cfi_startproc + ldgp $29, 0($27) + subl $30, CLOSURE_FS, $30 + cfi_adjust_cfa_offset(CLOSURE_FS) + stl $26, 0($30) + .prologue 1 + cfi_rel_offset($26, 0) + + stl $16, 10*8($30) + stl $17, 11*8($30) + stl $18, 12*8($30) + + ldl $16, 8($1) # load cif + ldl $17, 16($1) # load fun + mov $1, $18 # closure is user_data + br $do_closure + + cfi_endproc + .end ffi_go_closure_sysv + + .align 4 + .globl ffi_closure_sysv + .ent ffi_closure_sysv + FFI_HIDDEN(ffi_closure_sysv) + +ffi_closure_sysv: + cfi_startproc + ldgp $29, 0($27) + subl $30, CLOSURE_FS, $30 + cfi_adjust_cfa_offset(CLOSURE_FS) + stl $26, 0($30) + .prologue 1 + cfi_rel_offset($26, 0) + + # Store all of the potential argument registers in va_list format. + stl $16, 10*8($30) + stl $17, 11*8($30) + stl $18, 12*8($30) + + ldl $16, 24($1) # load cif + ldl $17, 32($1) # load fun + ldl $18, 40($1) # load user_data + +$do_closure: + stl $19, 13*8($30) + stl $20, 14*8($30) + stl $21, 15*8($30) + fstd $f16, 4*8($30) + fstd $f17, 5*8($30) + fstd $f18, 6*8($30) + fstd $f19, 7*8($30) + fstd $f20, 8*8($30) + fstd $f21, 9*8($30) + + # Call ffi_closure_sysv_inner to do the bulk of the work. + ldi $19, 2*8($30) + ldi $20, 10*8($30) + call $26, ffi_closure_sysv_inner +0: + ldih $29, 0($26) !gpdisp!2 + ldi $2, 99f-0b($26) + s4addl $0, 0, $1 # ldcode * 4 + ldl $0, 16($30) # preload return value + s4addl $1, $2, $1 # 99f + ldcode * 16 + ldi $29, 0($29) !gpdisp!2 + ldl $26, 0($30) + cfi_restore($26) + jmp $31, ($1), $load_32 + +.macro epilogue + addl $30, CLOSURE_FS, $30 + cfi_adjust_cfa_offset(-CLOSURE_FS) + ret + .align 4 + cfi_adjust_cfa_offset(CLOSURE_FS) +.endm + + .align 4 +99: +E 0 + epilogue + +E 1 + epilogue + +E 2 +$load_32: + sextl $0, $0 + epilogue + +E 3 + zapnot $0, 3, $0 + epilogue + +E 4 +#ifdef __sw_64_bwx__ + sexth $0, $0 +#else + sll $0, 48, $0 + sra $0, 48, $0 +#endif + epilogue + +E 5 + and $0, 0xff, $0 + epilogue + +E 6 +#ifdef __sw_64_bwx__ + sextb $0, $0 +#else + sll $0, 56, $0 + sra $0, 56, $0 +#endif + epilogue + +E 7 + flds $f0, 16($sp) + epilogue + +E 8 + fldd $f0, 16($sp) + epilogue + +E 9 + flds $f0, 16($sp) + flds $f1, 20($sp) + epilogue + +E 10 + fldd $f0, 16($sp) + fldd $f1, 24($sp) + epilogue + + cfi_endproc + .end ffi_closure_sysv + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/types.c b/libffi/src/types.c index 7e80aec6eb4b66da2a583cbb159da450dff6cd50..9ff182e35e247fbea6bb0e32e631917136876c5e 100644 --- a/libffi/src/types.c +++ b/libffi/src/types.c @@ -78,13 +78,13 @@ FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER, const); FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT, const); FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const); -#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha__ +#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha_ || defined __sw_64___ #define FFI_LDBL_CONST const #else #define FFI_LDBL_CONST #endif -#ifdef __alpha__ +#if defined __alpha__ || defined __sw_64__ /* Even if we're not configured to default to 128-bit long double, maintain binary compatibility, as -mlong-double-128 can be used at any time. */ diff --git a/libgcc/config.host b/libgcc/config.host index c529cc40f0c8d536524e2539483e6b148ded4413..ba196609ee296d753b44f44e1408602efbb10a20 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -199,6 +199,9 @@ s390*-*-*) sh[123456789lbe]*-*-*) cpu_type=sh ;; +sw_64*-*-*) + cpu_type=sw_64 + ;; tilegx*-*-*) cpu_type=tilegx ;; @@ -1424,6 +1427,21 @@ sparc64-*-linux*) # 64-bit SPARC's running GNU/Linux ;; sparc64-*-netbsd*) ;; +sw_64*-*-linux*) + tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm sw_64/t-linux" + extra_parts="$extra_parts crtfastmath.o" + md_unwind_header=sw_64/linux-unwind.h + ;; +sw_64*-*-freebsd*) + tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm" + extra_parts="$extra_parts crtbeginT.o crtfastmath.o" + ;; +sw_64*-*-netbsd*) + tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee" + ;; +sw_64*-*-openbsd*) + tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee" + ;; tic6x-*-uclinux) tmake_file="${tmake_file} t-softfp-sfdf t-softfp-excl t-softfp \ c6x/t-elf c6x/t-uclinux t-crtstuff-pic t-libgcc-pic \ diff --git a/libgcc/config/sw_64/crtfastmath.c b/libgcc/config/sw_64/crtfastmath.c new file mode 100644 index 0000000000000000000000000000000000000000..1cd890458228ea4ebf943326fb2242b0652d83dc --- /dev/null +++ b/libgcc/config/sw_64/crtfastmath.c @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2001-2020 Free Software Foundation, Inc. + * Contributed by Richard Henderson (rth@redhat.com) + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * . + */ + +/* Assume SYSV/1 compatible interfaces. */ + +extern void +__ieee_set_fp_control (unsigned long int); + +#define IEEE_MAP_DMZ (1UL << 12) /* Map denorm inputs to zero */ +#define IEEE_MAP_UMZ (1UL << 13) /* Map underflowed outputs to zero */ + +static void __attribute__ ((constructor)) set_fast_math (void) +{ + __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ); +} diff --git a/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver new file mode 100644 index 0000000000000000000000000000000000000000..21f259687860d495acb12b5afca5f24a5dd5ba2a --- /dev/null +++ b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver @@ -0,0 +1,50 @@ +# Copyright (C) 2006-2020 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +%ifdef __LONG_DOUBLE_128__ + +# long double 128 bit support in libgcc_s.so.1 is only available +# when configured with --with-long-double-128. Make sure all the +# symbols are available at @@GCC_LDBL_* versions to make it clear +# there is a configurable symbol set. + +%exclude { + __fixtfdi + __fixunstfdi + __floatditf + + __divtc3 + __multc3 + __powitf2 +} + +%inherit GCC_LDBL_3.0 GCC_3.0 +GCC_LDBL_3.0 { + __fixtfdi + __fixunstfdi + __floatditf +} + +%inherit GCC_LDBL_4.0.0 GCC_4.0.0 +GCC_LDBL_4.0.0 { + __divtc3 + __multc3 + __powitf2 +} + +%endif diff --git a/libgcc/config/sw_64/linux-unwind.h b/libgcc/config/sw_64/linux-unwind.h new file mode 100644 index 0000000000000000000000000000000000000000..79da6a16a94e501c749cd1f27544492a60d445d3 --- /dev/null +++ b/libgcc/config/sw_64/linux-unwind.h @@ -0,0 +1,103 @@ +/* DWARF2 EH unwinding support for Sw_64 Linux. + Copyright (C) 2004-2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef inhibit_libc +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#include +#include + +#define MD_FALLBACK_FRAME_STATE_FOR sw_64_fallback_frame_state + +static _Unwind_Reason_Code +sw_64_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned int *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i; + + if (pc[0] != 0x47fe0410 /* mov $30,$16 */ + || pc[2] != 0x00000083) /* callsys */ + return _URC_END_OF_STACK; + if (context->cfa == 0) + return _URC_END_OF_STACK; + if (pc[1] == 0x201f0067) /* lda $0,NR_sigreturn */ + sc = context->cfa; + else if (pc[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */ + { + struct rt_sigframe + { + siginfo_t info; + ucontext_t uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ + sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[30]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + for (i = 0; i < 30; ++i) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset = (long) &sc->sc_regs[i] - new_cfa; + } + for (i = 0; i < 31; ++i) + { + fs->regs.reg[i + 32].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 32].loc.offset = (long) &sc->sc_fpregs[i] - new_cfa; + } + fs->regs.reg[64].how = REG_SAVED_OFFSET; + fs->regs.reg[64].loc.offset = (long) &sc->sc_pc - new_cfa; + fs->retaddr_column = 64; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#define MD_FROB_UPDATE_CONTEXT sw_64_frob_update_context + +/* Fix up for signal handlers that don't have S flag set. */ + +static void +sw_64_frob_update_context (struct _Unwind_Context *context, + _Unwind_FrameState *fs ATTRIBUTE_UNUSED) +{ + unsigned int *pc = context->ra; + + if (pc[0] == 0x47fe0410 /* mov $30,$16 */ + && pc[2] == 0x00000083 /* callsys */ + && (pc[1] == 0x201f0067 /* lda $0,NR_sigreturn */ + || pc[1] == 0x201f015f)) /* lda $0,NR_rt_sigreturn */ + _Unwind_SetSignalFrame (context, 1); +} +#endif diff --git a/libgcc/config/sw_64/qrnnd.S b/libgcc/config/sw_64/qrnnd.S new file mode 100644 index 0000000000000000000000000000000000000000..d22b31b4ebb0c3bcc6740c350cfa255912130a88 --- /dev/null +++ b/libgcc/config/sw_64/qrnnd.S @@ -0,0 +1,181 @@ + # Sw_64 __udiv_qrnnd + # Copyright (C) 1992-2020 Free Software Foundation, Inc. + + # This file is part of GCC. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by + # the Free Software Foundation; either version 3 of the License, or (at your + # option) any later version. + + # This file is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # Under Section 7 of GPL version 3, you are granted additional + # permissions described in the GCC Runtime Library Exception, version + # 3.1, as published by the Free Software Foundation. + + # You should have received a copy of the GNU General Public License and + # a copy of the GCC Runtime Library Exception along with this program; + # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + # . + +#ifdef __ELF__ +.section .note.GNU-stack,"" +#endif + + .set noreorder + .set noat + + .text + + .globl __udiv_qrnnd + .ent __udiv_qrnnd +#ifdef __VMS__ +__udiv_qrnnd..en: + .frame $29,0,$26,0 + .prologue +#else +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 +#endif +/* + ldiq -> ldi + addq->addl + subq->subl + cmovne qb,tmp,n1->selne qb,tmp,n1,n1 + stq ->stl + cmoveq tmp,AT,n1(n0)->seleq tmp,AT,n1,n1(n0,n0) */ +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 +#define AT $at + + ldi cnt,16 + blt d,$largedivisor + +$loop1: cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule d,n1,qb + subl n1,d,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule d,n1,qb + subl n1,d,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule d,n1,qb + subl n1,d,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule d,n1,qb + subl n1,d,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + subl cnt,1,cnt + bgt cnt,$loop1 + stl n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addl $5,$6,$5 + +$loop2: cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule $5,n1,qb + subl n1,$5,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule $5,n1,qb + subl n1,$5,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule $5,n1,qb + subl n1,$5,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addl n1,n1,n1 + bis n1,tmp,n1 + addl n0,n0,n0 + cmpule $5,n1,qb + subl n1,$5,tmp + selne qb,tmp,n1,n1 + bis n0,qb,n0 + subl cnt,1,cnt + bgt cnt,$loop2 + + addl n1,n1,n1 + addl $4,n1,n1 + bne $6,$Odd + stl n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$Odd: + /* q' in n0. r' in n1 */ + addl n1,n0,n1 + + cmpult n1,n0,tmp # tmp := carry from addl + subl n1,d,AT + addl n0,tmp,n0 + selne tmp,AT,n1,n1 + + cmpult n1,d,tmp + addl n0,1,AT + seleq tmp,AT,n0,n0 + subl n1,d,AT + seleq tmp,AT,n1,n1 + + stl n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +#ifdef __VMS__ + .link + .align 3 +__udiv_qrnnd: + .pdesc __udiv_qrnnd..en,null +#endif + .end __udiv_qrnnd diff --git a/libgcc/config/sw_64/t-ieee b/libgcc/config/sw_64/t-ieee new file mode 100644 index 0000000000000000000000000000000000000000..9b66e50acc5db17c3fa2cdbd8040dff123acad0d --- /dev/null +++ b/libgcc/config/sw_64/t-ieee @@ -0,0 +1,2 @@ +# All sw_64s get an IEEE complaint set of libraries. +#HOST_LIBGCC2_CFLAGS += -mieee diff --git a/libgcc/config/sw_64/t-linux b/libgcc/config/sw_64/t-linux new file mode 100644 index 0000000000000000000000000000000000000000..0b7b7e6a1086ce5bc3a447620f20d4d9208630ae --- /dev/null +++ b/libgcc/config/sw_64/t-linux @@ -0,0 +1,2 @@ +SHLIB_MAPFILES += $(srcdir)/config/sw_64/libgcc-sw_64-ldbl.ver + diff --git a/libgcc/config/sw_64/t-sw_64 b/libgcc/config/sw_64/t-sw_64 new file mode 100644 index 0000000000000000000000000000000000000000..dffba8ee79134187635e5ca47873008a48ae784b --- /dev/null +++ b/libgcc/config/sw_64/t-sw_64 @@ -0,0 +1,6 @@ +# This is a support routine for longlong.h, used by libgcc2.c. +LIB2ADD += $(srcdir)/config/sw_64/qrnnd.S + +# When GAS-generated unwind tables are created, they get created +# after the __FRAME_END__ terminator, which causes an ld error. +CRTSTUFF_T_CFLAGS = -fno-unwind-tables diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c index e0a9fd712e70c816113ddb6d061ce979657878fc..50aa1bf06d3a93b3879f774e814e9f721eb87c59 100644 --- a/libgcc/libgcc2.c +++ b/libgcc/libgcc2.c @@ -2187,7 +2187,7 @@ int mprotect (char *,int, int); int getpagesize (void) { -#ifdef _ALPHA_ +#if defined _ALPHA_ || defined _SW_64_ return 8192; #else return 4096; diff --git a/libgfortran/config/fpu-glibc.h b/libgfortran/config/fpu-glibc.h index 2abb0da6b1e8506166d78ca22f72578d63d93726..f4153059797bbc2315b0882f18bc5095c00f819a 100644 --- a/libgfortran/config/fpu-glibc.h +++ b/libgfortran/config/fpu-glibc.h @@ -446,7 +446,7 @@ set_fpu_state (void *state) int support_fpu_underflow_control (int kind __attribute__((unused))) { -#if defined(__alpha__) && defined(FE_MAP_UMZ) +#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ) return (kind == 4 || kind == 8) ? 1 : 0; #else return 0; @@ -457,7 +457,7 @@ support_fpu_underflow_control (int kind __attribute__((unused))) int get_fpu_underflow_mode (void) { -#if defined(__alpha__) && defined(FE_MAP_UMZ) +#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ) fenv_t state = __ieee_get_fp_control (); @@ -475,7 +475,7 @@ get_fpu_underflow_mode (void) void set_fpu_underflow_mode (int gradual __attribute__((unused))) { -#if defined(__alpha__) && defined(FE_MAP_UMZ) +#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ) fenv_t state = __ieee_get_fp_control (); diff --git a/libgfortran/configure.host b/libgfortran/configure.host index 5824f253e2f18ea7a019256d8ca1c54b950aa5a3..85407b61eb4b6e602f1c03821b61b8a4b02b4251 100644 --- a/libgfortran/configure.host +++ b/libgfortran/configure.host @@ -56,4 +56,6 @@ case "${host_cpu}" in ieee_flags="-mieee" ;; sh*) ieee_flags="-mieee" ;; + sw_64*) + ieee_flags="-mieee" ;; esac diff --git a/libgo/configure b/libgo/configure index 2f787392abd220946547acc40b58e657e348155f..51cff79ba79e64dc9e2424d62ae08f433f95eefe 100644 --- a/libgo/configure +++ b/libgo/configure @@ -14070,10 +14070,10 @@ esac # - libgo/go/syscall/endian_XX.go # - possibly others # - possibly update files in libgo/go/internal/syscall/unix -ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm" +ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm" # All known GOARCH family values. -ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM" +ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM" GOARCH=unknown case ${host} in @@ -14256,6 +14256,9 @@ else fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ;; + sw_64*-*-*) + GOARCH=sw_64 + ;; esac diff --git a/libgo/configure.ac b/libgo/configure.ac index f800d44a0e9cf194e1e3fff87dbdd9a093c4ba39..91cfe35134807e26012f8e0b2d93a2dc96618b46 100644 --- a/libgo/configure.ac +++ b/libgo/configure.ac @@ -236,10 +236,10 @@ AC_SUBST(USE_DEJAGNU) # - libgo/go/syscall/endian_XX.go # - possibly others # - possibly update files in libgo/go/internal/syscall/unix -ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm" +ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm" # All known GOARCH family values. -ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM" +ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM" GOARCH=unknown case ${host} in @@ -361,6 +361,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ [GOARCH=sparc], [GOARCH=sparc64]) ;; + sw_64*-*-*) + GOARCH=sw_64 + ;; esac AC_SUBST(GOARCH) AC_SUBST(ALLGOARCH) diff --git a/libgo/go/cmd/cgo/main.go b/libgo/go/cmd/cgo/main.go index 80f35681d75f0dfcd91cf1781ff1d4f74c5167fd..366abd1061b4a075be320ecff6effdd5b291caa6 100644 --- a/libgo/go/cmd/cgo/main.go +++ b/libgo/go/cmd/cgo/main.go @@ -191,6 +191,7 @@ var ptrSizeMap = map[string]int64{ "shbe": 4, "sparc": 4, "sparc64": 8, + "sw_64": 8, } var intSizeMap = map[string]int64{ @@ -217,6 +218,7 @@ var intSizeMap = map[string]int64{ "shbe": 4, "sparc": 4, "sparc64": 8, + "sw_64": 8, } var cPrefix string diff --git a/libgo/go/cmd/internal/sys/arch.go b/libgo/go/cmd/internal/sys/arch.go index e8687363defc502c17cdb97660b7c4ac886d799d..604bbec612eb971bda07658d2160ba5ab2ca44b1 100644 --- a/libgo/go/cmd/internal/sys/arch.go +++ b/libgo/go/cmd/internal/sys/arch.go @@ -12,6 +12,7 @@ type ArchFamily byte const ( NoArch ArchFamily = iota + SW_64 AMD64 ARM ARM64 @@ -169,8 +170,17 @@ var ArchWasm = &Arch{ RegSize: 8, MinLC: 1, } - +/*TODO*/ +var ArchSW_64 = &Arch{ + Name: "sw_64", + Family: SW_64, + ByteOrder: binary.LittleEndian, + PtrSize: 8, + RegSize: 8, + MinLC: 1, +} var Archs = [...]*Arch{ + ArchSW_64, Arch386, ArchAMD64, ArchARM, diff --git a/libgo/go/debug/elf/elf.go b/libgo/go/debug/elf/elf.go index 96a67ce732728c38b7cb07adaa78fb25ff08dd03..c417537b9210186b55ab2c328731a66397bc27b1 100644 --- a/libgo/go/debug/elf/elf.go +++ b/libgo/go/debug/elf/elf.go @@ -6,6 +6,7 @@ * $FreeBSD: src/sys/sys/elf64.h,v 1.10.14.1 2005/12/30 22:13:58 marcel Exp $ * $FreeBSD: src/sys/sys/elf_common.h,v 1.15.8.1 2005/12/30 22:13:58 marcel Exp $ * $FreeBSD: src/sys/alpha/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $ + * $FreeBSD: src/sys/sw_64/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $ * $FreeBSD: src/sys/amd64/include/elf.h,v 1.18 2004/08/03 08:21:48 dfr Exp $ * $FreeBSD: src/sys/arm/include/elf.h,v 1.5.2.1 2006/06/30 21:42:52 cognet Exp $ * $FreeBSD: src/sys/i386/include/elf.h,v 1.16 2004/08/02 19:12:17 dfr Exp $ @@ -390,6 +391,8 @@ const ( EM_MIPS_RS4_BE Machine = 10 /* MIPS R4000 Big-Endian */ EM_ALPHA_STD Machine = 41 /* Digital Alpha (standard value). */ EM_ALPHA Machine = 0x9026 /* Alpha (written in the absence of an ABI) */ + EM_SW_64_STD Machine = 41 /* Digital Sw_64 (standard value). */ + EM_SW_64 Machine = 0x9916 /* mieee-opt Sw_64 (written in the absence of an ABI) */ ) var machineStrings = []intName{ @@ -581,6 +584,8 @@ var machineStrings = []intName{ {10, "EM_MIPS_RS4_BE"}, {41, "EM_ALPHA_STD"}, {0x9026, "EM_ALPHA"}, + {41, "EM_SW_64_STD"}, + {0x9916, "EM_SW_64"}, } func (i Machine) String() string { return stringName(uint32(i), machineStrings, false) } @@ -1463,6 +1468,73 @@ var ralphaStrings = []intName{ func (i R_ALPHA) String() string { return stringName(uint32(i), ralphaStrings, false) } func (i R_ALPHA) GoString() string { return stringName(uint32(i), ralphaStrings, true) } +// Relocation types for SW_64. +type R_SW_64 int + +const ( + R_SW_64_NONE R_SW_64 = 0 /* No reloc */ + R_SW_64_REFLONG R_SW_64 = 1 /* Direct 32 bit */ + R_SW_64_REFQUAD R_SW_64 = 2 /* Direct 64 bit */ + R_SW_64_GPREL32 R_SW_64 = 3 /* GP relative 32 bit */ + R_SW_64_LITERAL R_SW_64 = 4 /* GP relative 16 bit w/optimization */ + R_SW_64_LITUSE R_SW_64 = 5 /* Optimization hint for LITERAL */ + R_SW_64_GPDISP R_SW_64 = 6 /* Add displacement to GP */ + R_SW_64_BRADDR R_SW_64 = 7 /* PC+4 relative 23 bit shifted */ + R_SW_64_HINT R_SW_64 = 8 /* PC+4 relative 16 bit shifted */ + R_SW_64_SREL16 R_SW_64 = 9 /* PC relative 16 bit */ + R_SW_64_SREL32 R_SW_64 = 10 /* PC relative 32 bit */ + R_SW_64_SREL64 R_SW_64 = 11 /* PC relative 64 bit */ + R_SW_64_OP_PUSH R_SW_64 = 12 /* OP stack push */ + R_SW_64_OP_STORE R_SW_64 = 13 /* OP stack pop and store */ + R_SW_64_OP_PSUB R_SW_64 = 14 /* OP stack subtract */ + R_SW_64_OP_PRSHIFT R_SW_64 = 15 /* OP stack right shift */ + R_SW_64_GPVALUE R_SW_64 = 16 + R_SW_64_GPRELHIGH R_SW_64 = 17 + R_SW_64_GPRELLOW R_SW_64 = 18 + R_SW_64_IMMED_GP_16 R_SW_64 = 19 + R_SW_64_IMMED_GP_HI32 R_SW_64 = 20 + R_SW_64_IMMED_SCN_HI32 R_SW_64 = 21 + R_SW_64_IMMED_BR_HI32 R_SW_64 = 22 + R_SW_64_IMMED_LO32 R_SW_64 = 23 + R_SW_64_COPY R_SW_64 = 24 /* Copy sympol at runtime */ + R_SW_64_GLOB_DAT R_SW_64 = 25 /* Create GOT entry */ + R_SW_64_JMP_SLOT R_SW_64 = 26 /* Create PLT entry */ + R_SW_64_RELATIVE R_SW_64 = 27 /* Adjust by program base */ +) + +var rsw_64Strings = []intName{ + {0, "R_SW_64_NONE"}, + {1, "R_SW_64_REFLONG"}, + {2, "R_SW_64_REFQUAD"}, + {3, "R_SW_64_GPREL32"}, + {4, "R_SW_64_LITERAL"}, + {5, "R_SW_64_LITUSE"}, + {6, "R_SW_64_GPDISP"}, + {7, "R_SW_64_BRADDR"}, + {8, "R_SW_64_HINT"}, + {9, "R_SW_64_SREL16"}, + {10, "R_SW_64_SREL32"}, + {11, "R_SW_64_SREL64"}, + {12, "R_SW_64_OP_PUSH"}, + {13, "R_SW_64_OP_STORE"}, + {14, "R_SW_64_OP_PSUB"}, + {15, "R_SW_64_OP_PRSHIFT"}, + {16, "R_SW_64_GPVALUE"}, + {17, "R_SW_64_GPRELHIGH"}, + {18, "R_SW_64_GPRELLOW"}, + {19, "R_SW_64_IMMED_GP_16"}, + {20, "R_SW_64_IMMED_GP_HI32"}, + {21, "R_SW_64_IMMED_SCN_HI32"}, + {22, "R_SW_64_IMMED_BR_HI32"}, + {23, "R_SW_64_IMMED_LO32"}, + {24, "R_SW_64_COPY"}, + {25, "R_SW_64_GLOB_DAT"}, + {26, "R_SW_64_JMP_SLOT"}, + {27, "R_SW_64_RELATIVE"}, +} + +func (i R_SW_64) String() string { return stringName(uint32(i), rsw_64Strings, false) } +func (i R_SW_64) GoString() string { return stringName(uint32(i), rsw_64Strings, true) } // Relocation types for ARM. type R_ARM int diff --git a/libgo/go/debug/elf/elf_test.go b/libgo/go/debug/elf/elf_test.go index f8985a8992361fb1f4ff958515f705bae3575f65..b4dccf386cc6db548a553a348de95ecd7fbca0bc 100644 --- a/libgo/go/debug/elf/elf_test.go +++ b/libgo/go/debug/elf/elf_test.go @@ -31,6 +31,7 @@ var nameTests = []nameTest{ {STV_HIDDEN, "STV_HIDDEN"}, {R_X86_64_PC32, "R_X86_64_PC32"}, {R_ALPHA_OP_PUSH, "R_ALPHA_OP_PUSH"}, + {R_SW_64_OP_PUSH, "R_SW_64_OP_PUSH"}, {R_ARM_THM_ABS5, "R_ARM_THM_ABS5"}, {R_386_GOT32, "R_386_GOT32"}, {R_PPC_GOT16_HI, "R_PPC_GOT16_HI"}, diff --git a/libgo/go/debug/elf/file.go b/libgo/go/debug/elf/file.go index b9a8b1e0cbb545de13b882d0171326aeb2d2b2b8..eea0f9aa10848acb28a8e21f6bfb1f0557bc7e31 100644 --- a/libgo/go/debug/elf/file.go +++ b/libgo/go/debug/elf/file.go @@ -627,6 +627,8 @@ func (f *File) applyRelocations(dst []byte, rels []byte) error { return f.applyRelocationsSPARC64(dst, rels) case f.Class == ELFCLASS64 && f.Machine == EM_ALPHA: return f.applyRelocationsALPHA(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_SW_64: + return f.applyRelocationsSW_64(dst, rels) default: return errors.New("applyRelocations: not implemented") } @@ -1238,6 +1240,53 @@ func (f *File) applyRelocationsALPHA(dst []byte, rels []byte) error { return nil } +//SW_64 begin + +func (f *File) applyRelocationsSW_64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_SW_64(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if SymType(sym.Info&0xf) != STT_SECTION { + // We don't handle non-section relocations for now. + continue + } + + // There are relocations, so this must be a normal + // object file, and we only look at section symbols, + // so we assume that the symbol value is 0. + switch t { + case R_SW_64_REFQUAD: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend)) + case R_SW_64_REFLONG: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + } + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) + } + } + return nil +} +//SW_64 end func (f *File) DWARF() (*dwarf.Data, error) { dwarfSuffix := func(s *Section) string { switch { diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go index 5e73dcf731660c3da429ec5164d3b97c7e35b59f..1a6e2860e9b4e376484b34e6784fcc021c2ec26e 100644 --- a/libgo/go/encoding/xml/xml.go +++ b/libgo/go/encoding/xml/xml.go @@ -1719,6 +1719,7 @@ var htmlEntity = map[string]string{ "Psi": "\u03A8", "Omega": "\u03A9", "alpha": "\u03B1", + "sw_64": "\u03B1", "beta": "\u03B2", "gamma": "\u03B3", "delta": "\u03B4", diff --git a/libgo/go/go/build/syslist.go b/libgo/go/go/build/syslist.go index d72649b8b4129913c41aba8aa6dabe5610f2f446..c0975fa96ff444d3af7132727b3331ae05436872 100644 --- a/libgo/go/go/build/syslist.go +++ b/libgo/go/go/build/syslist.go @@ -8,4 +8,4 @@ package build // Do not remove from this list, as these are used for go/build filename matching. const goosList = "aix android darwin dragonfly freebsd hurd illumos js linux nacl netbsd openbsd plan9 solaris windows zos " -const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le ppc riscv riscv64 s390 s390x sparc sparc64 wasm alpha m68k nios2 sh shbe " +const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le ppc riscv riscv64 s390 s390x sparc sparc64 wasm alpha sw_64 m68k nios2 sh shbe " diff --git a/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go new file mode 100644 index 0000000000000000000000000000000000000000..9587b5aa4caf004945f33f5fd01884f72b893c5b --- /dev/null +++ b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go @@ -0,0 +1,9 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unix + +// Linux getrandom system call number. +// See GetRandom in getrandom_linux.go. +const randomTrap uintptr = 511 diff --git a/libgo/go/net/listen_test.go b/libgo/go/net/listen_test.go index d8c72096ed16514fc415f9ba15905fc2439a25de..ba7808774c9909b8ccf5b60ab872a55847ec4716 100644 --- a/libgo/go/net/listen_test.go +++ b/libgo/go/net/listen_test.go @@ -677,7 +677,7 @@ func multicastRIBContains(ip IP) (bool, error) { case "aix", "dragonfly", "netbsd", "openbsd", "plan9", "solaris", "illumos", "windows": return true, nil // not implemented yet case "linux": - if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" { + if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" || runtime.GOARCH == "sw_64" { return true, nil // not implemented yet } } diff --git a/libgo/go/regexp/testdata/basic.dat b/libgo/go/regexp/testdata/basic.dat index 7859290ba1dd0dc354b3149e16f364b2a1015794..061c403d6c9270dcf6e2b1195b84c9087843dde0 100644 --- a/libgo/go/regexp/testdata/basic.dat +++ b/libgo/go/regexp/testdata/basic.dat @@ -157,6 +157,7 @@ E a[bcd]*dcdcde adcdcde (0,7) E (ab|a)b*c abc (0,3)(0,2) E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) +BE [A-Za-z_][A-Za-z0-9_]* sw_64 (0,5) E ^a(bc+|b[eh])g|.h$ abh (1,3) E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go index 704bbe6f62bc9a6a79df8be630f7c3e2ff10a73d..d7b9e0b226d625e4d5a9bf9ed3c42493b9e737fb 100644 --- a/libgo/go/runtime/hash64.go +++ b/libgo/go/runtime/hash64.go @@ -6,7 +6,7 @@ // xxhash: https://code.google.com/p/xxhash/ // cityhash: https://code.google.com/p/cityhash/ -// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64 +// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha sw_64 amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64 package runtime diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go index af9e7d164b93eeec575edd6de54a3ea56388a931..d572e6656765757502c1e44800e223dd72e350fa 100644 --- a/libgo/go/runtime/lfstack_64bit.go +++ b/libgo/go/runtime/lfstack_64bit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sparc64 ia64 +// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sw_64 sparc64 ia64 package runtime diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go index 385b7b3e7a7a5819b8d90710e6f31f7781231b03..ceed0f4423e2381e5532146959e82f46d79ccf3e 100644 --- a/libgo/go/runtime/mpagealloc_64bit.go +++ b/libgo/go/runtime/mpagealloc_64bit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64 +// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64 sw_64 // See mpagealloc_32bit.go for why darwin/arm64 is excluded here. diff --git a/libgo/go/syscall/endian_little.go b/libgo/go/syscall/endian_little.go index 0cd2d7524c646f6e61bbd7428a9b14b6761553dd..b67d4807978f583ea652b186f7742e8fc3b5e140 100644 --- a/libgo/go/syscall/endian_little.go +++ b/libgo/go/syscall/endian_little.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // -// +build 386 alpha amd64 amd64p32 arm arm64 ia64 mips64le mipsle mips64p32le nios2 ppc64le riscv64 sh wasm +// +build 386 alpha sw_64 amd64 amd64p32 arm arm64 ia64 mips64le mipsle mips64p32le nios2 ppc64le riscv64 sh wasm package syscall diff --git a/libgo/go/syscall/libcall_linux_sw_64.go b/libgo/go/syscall/libcall_linux_sw_64.go new file mode 100644 index 0000000000000000000000000000000000000000..f6bb7be296bebf92d78278428b3c565060f29838 --- /dev/null +++ b/libgo/go/syscall/libcall_linux_sw_64.go @@ -0,0 +1,13 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GNU/Linux library calls Sw_64 specific. + +package syscall + +//sys Ioperm(from int, num int, on int) (err error) +//ioperm(from _C_long, num _C_long, on _C_int) _C_int + +//sys Iopl(level int) (err error) +//iopl(level _C_int) _C_int diff --git a/libgo/go/syscall/syscall_linux_sw_64.go b/libgo/go/syscall/syscall_linux_sw_64.go new file mode 100644 index 0000000000000000000000000000000000000000..5a87d687d1be72d8c36da399f26796152e7ff742 --- /dev/null +++ b/libgo/go/syscall/syscall_linux_sw_64.go @@ -0,0 +1,25 @@ +// syscall_linux_sw_64.go -- GNU/Linux SW_64 specific support + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syscall + +import "unsafe" + +func (r *PtraceRegs) PC() uint64 { + return r.Pc +} + +func (r *PtraceRegs) SetPC(pc uint64) { + r.Pc = pc +} + +func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) { + return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout))) +} + +func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) { + return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs))) +} diff --git a/libgo/goarch.sh b/libgo/goarch.sh index a5b6217c931193f4cda5bac1f3577682b99035fc..7013301f4f4bbc19568a59d6c43a0a8492cf5ade 100644 --- a/libgo/goarch.sh +++ b/libgo/goarch.sh @@ -52,6 +52,11 @@ case $goarch in defaultphyspagesize=8192 pcquantum=4 ;; + sw_64) + family=SW_64 + defaultphyspagesize=8192 + pcquantum=4 + ;; amd64) family=AMD64 ;; diff --git a/libgo/match.sh b/libgo/match.sh index cd35942f8bcca4d03de4fb110e8a46cea6947efe..028ea11a38643e03b89aed386562106418052a7e 100644 --- a/libgo/match.sh +++ b/libgo/match.sh @@ -116,7 +116,7 @@ for f in $gofiles; do aix | android | darwin | dragonfly | freebsd | illumos | hurd | js | linux | nacl | netbsd | openbsd | plan9 | solaris | windows) tag1=nonmatchingtag ;; - 386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm) + 386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | sw_64 | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm) tag1=nonmatchingtag ;; esac @@ -128,7 +128,7 @@ for f in $gofiles; do aix | android | darwin | dragonfly | freebsd | hurd | illumos | js | linux | nacl | netbsd | openbsd | plan9 | solaris | windows) tag2=nonmatchingtag ;; - 386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm) + 386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | sw_64 | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm) tag2=nonmatchingtag ;; esac diff --git a/libgo/mksysinfo.sh b/libgo/mksysinfo.sh index bd2ba32cba1b58c9fc8fb54d055eb2d3089ab94c..ce2d557100d9310eff71aa3763244e1898ab2d0e 100644 --- a/libgo/mksysinfo.sh +++ b/libgo/mksysinfo.sh @@ -353,7 +353,12 @@ if test "$regs" = ""; then # mips* regs=`grep '^type _pt_regs struct' gen-sysinfo.go || true` fi +if test "$regs" = ""; then + # sw_64* + regs=`grep '^type _user_pt_regs struct' gen-sysinfo.go || true` +fi if test "$regs" != ""; then + regs=`echo $regs | sed -e 's/type _user_pt_regs struct//'` regs=`echo $regs | sed -e 's/type _pt_regs struct//'` regs=`echo $regs | sed -e 's/type __*user_regs_struct struct //' -e 's/[{}]//g'` diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c index b429fdb240349c96ef84df37dc86fa8dd5e48c93..9f7eb1b8f0451ebc13b912bbf2f7789d59179f79 100644 --- a/libgo/runtime/go-signal.c +++ b/libgo/runtime/go-signal.c @@ -223,6 +223,8 @@ getSiginfo(siginfo_t *info, void *context __attribute__((unused))) ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gregs[REG_EIP]; #elif defined(__alpha__) && defined(__linux__) ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc; +#elif defined(__sw_64__) && defined(__linux__) + ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc; #elif defined(__PPC__) && defined(__linux__) ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.regs->nip; #elif defined(__PPC__) && defined(_AIX) @@ -296,7 +298,7 @@ dumpregs(siginfo_t *info __attribute__((unused)), void *context __attribute__((u runtime_printf("fs %x\n", m->gregs[REG_FS]); runtime_printf("gs %x\n", m->gregs[REG_GS]); } -#elif defined(__alpha__) && defined(__linux__) +#elif (defined(__alpha__)||defined(__sw_64__)) && defined(__linux__) { mcontext_t *m = &((ucontext_t*)(context))->uc_mcontext; diff --git a/libgomp/config/linux/sw_64/futex.h b/libgomp/config/linux/sw_64/futex.h new file mode 100644 index 0000000000000000000000000000000000000000..cd19a9bb4770df38d7ffa2c953f5dc41f56af5e8 --- /dev/null +++ b/libgomp/config/linux/sw_64/futex.h @@ -0,0 +1,102 @@ +/* Copyright (C) 2005-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Provide target-specific access to the futex system call. */ + +#ifndef SYS_futex +#define SYS_futex 394 +#endif + +static inline void +futex_wait (int *addr, int val) +{ + register long sc_0 __asm__("$0"); + register long sc_16 __asm__("$16"); + register long sc_17 __asm__("$17"); + register long sc_18 __asm__("$18"); + register long sc_19 __asm__("$19"); + + sc_0 = SYS_futex; + sc_16 = (long) addr; + sc_17 = gomp_futex_wait; + sc_18 = val; + sc_19 = 0; + __asm volatile("callsys" + : "=r"(sc_0), "=r"(sc_19) + : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19) + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23", + "$24", "$25", "$27", "$28", "memory"); + if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS) + { + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; + sc_0 = SYS_futex; + sc_17 &= ~FUTEX_PRIVATE_FLAG; + sc_19 = 0; + __asm volatile("callsys" + : "=r"(sc_0), "=r"(sc_19) + : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19) + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", + "$23", "$24", "$25", "$27", "$28", "memory"); + } +} + +static inline void +futex_wake (int *addr, int count) +{ + register long sc_0 __asm__("$0"); + register long sc_16 __asm__("$16"); + register long sc_17 __asm__("$17"); + register long sc_18 __asm__("$18"); + register long sc_19 __asm__("$19"); + + sc_0 = SYS_futex; + sc_16 = (long) addr; + sc_17 = gomp_futex_wake; + sc_18 = count; + __asm volatile("callsys" + : "=r"(sc_0), "=r"(sc_19) + : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18) + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23", + "$24", "$25", "$27", "$28", "memory"); + if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS) + { + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; + sc_0 = SYS_futex; + sc_17 &= ~FUTEX_PRIVATE_FLAG; + __asm volatile("callsys" + : "=r"(sc_0), "=r"(sc_19) + : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18) + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", + "$23", "$24", "$25", "$27", "$28", "memory"); + } +} + +static inline void +cpu_relax (void) +{ + __asm volatile("" : : : "memory"); +} diff --git a/libgomp/configure b/libgomp/configure index b03036c2738cbee16e15f3198c3028a72633c9ae..7d8f769d0534d86d60e652b89e3c765d22fe64d7 100644 --- a/libgomp/configure +++ b/libgomp/configure @@ -11844,6 +11844,12 @@ case `echo $GFORTRAN` in FC=no fi ;; esac +case "${target}" in + sw_64-*-*) + FC="$GFORTRAN" + ;; +*) +esac ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt index e5b558be0c1948cd50fb8ea67f8495f0ae7e1f31..8a1a8565e522636f163252f7a1ba02bf12e6058b 100644 --- a/libgomp/configure.tgt +++ b/libgomp/configure.tgt @@ -76,6 +76,10 @@ if test x$enable_linux_futex = xyes; then config_path="linux/s390 linux posix" ;; + sw_64*-*-linux*) + config_path="linux/sw_64 linux posix" + ;; + tile*-*-linux*) config_path="linux/tile linux posix" ;; diff --git a/libgomp/libgomp.spec.in b/libgomp/libgomp.spec.in index 5651603f48745617e9aa7de5f07d2a618a18caad..738895d592659028b08da32455d004cf5a709705 100644 --- a/libgomp/libgomp.spec.in +++ b/libgomp/libgomp.spec.in @@ -1,3 +1,4 @@ # This spec file is read by gcc when linking. It is used to specify the # standard libraries we need in order to link with libgomp. -*link_gomp: @link_gomp@ +#*link_gomp: @link_gomp@ +*link_gomp: @link_gomp@ --whole-archive -lpthread --no-whole-archive diff --git a/libitm/config/linux/sw_64/futex_bits.h b/libitm/config/linux/sw_64/futex_bits.h new file mode 100644 index 0000000000000000000000000000000000000000..5688fc17a14179f8609c4b6a0d6d43cc009bbaa0 --- /dev/null +++ b/libitm/config/linux/sw_64/futex_bits.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2008-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Provide target-specific access to the futex system call. */ + +#ifndef SYS_futex +#define SYS_futex 394 +#endif + +static inline long +sys_futex0 (std::atomic *addr, int op, int val) +{ + register long sc_0 __asm__("$0"); + register long sc_16 __asm__("$16"); + register long sc_17 __asm__("$17"); + register long sc_18 __asm__("$18"); + register long sc_19 __asm__("$19"); + long res; + + sc_0 = SYS_futex; + sc_16 = (long) addr; + sc_17 = op; + sc_18 = val; + sc_19 = 0; + __asm volatile("callsys" + : "=r"(sc_0), "=r"(sc_19) + : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19) + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23", + "$24", "$25", "$27", "$28", "memory"); + + res = sc_0; + if (__builtin_expect (sc_19, 0)) + res = -res; + return res; +} diff --git a/libitm/config/sw_64/sjlj.S b/libitm/config/sw_64/sjlj.S new file mode 100644 index 0000000000000000000000000000000000000000..5c62e3d237179834e2d9df91c8cea83d7e975d58 --- /dev/null +++ b/libitm/config/sw_64/sjlj.S @@ -0,0 +1,112 @@ +/* Copyright (C) 2009-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + .text + .align 4 + .globl _ITM_beginTransaction + .ent _ITM_beginTransaction + +#define FRAME 144 + +_ITM_beginTransaction: + ldgp $29, 0($27) + subl $30, FRAME, $30 + .frame $30, FRAME, $26, 0 + .mask 0x04000000, 0 + stl $26, 0($30) + .prologue 1 + + stl $9, 8($30) + stl $10, 16($30) + addl $30, FRAME, $0 + stl $11, 24($30) + + stl $12, 32($30) + stl $13, 40($30) + stl $14, 48($30) + stl $15, 56($30) + + stl $0, 64($30) + fstd $f2, 72($30) + fstd $f3, 80($30) + fstd $f4, 88($30) + + fstd $f5, 96($30) + fstd $f6, 104($30) + fstd $f7, 112($30) + fstd $f8, 120($30) + + fstd $f9, 128($30) + mov $30, $17 +#ifdef __PIC__ + unop + bsr $26, GTM_begin_transaction !samegp +#else + call $26, GTM_begin_transaction + ldgp $29, 0($26) +#endif + + ldl $26, 0($30) + addl $30, FRAME, $30 + ret +.end _ITM_beginTransaction + + .align 4 + .globl GTM_longjmp +#ifdef __ELF__ + .hidden GTM_longjmp +#endif + .ent GTM_longjmp + +GTM_longjmp: + .prologue 0 + ldl $26, 0($17) + ldl $9, 8($17) + ldl $10, 16($17) + ldl $11, 24($17) + + ldl $12, 32($17) + ldl $13, 40($17) + ldl $14, 48($17) + ldl $15, 56($17) + + ldl $1, 64($17) + fldd $f2, 72($17) + fldd $f3, 80($17) + fldd $f4, 88($17) + + fldd $f5, 96($17) + fldd $f6, 104($17) + fldd $f7, 112($17) + fldd $f8, 120($17) + + fldd $f9, 128($17) + mov $16, $0 + mov $1, $30 + ret +.end GTM_longjmp + +#ifdef __linux__ +.section .note.GNU-stack, "", @progbits +#endif diff --git a/libitm/config/sw_64/target.h b/libitm/config/sw_64/target.h new file mode 100644 index 0000000000000000000000000000000000000000..4cf8d8d41d86c71cc95c99517317ac01a8f2d35a --- /dev/null +++ b/libitm/config/sw_64/target.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2009-2020 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +namespace GTM HIDDEN { + +typedef struct gtm_jmpbuf +{ + unsigned long pc; + unsigned long s[7]; + void *cfa; + unsigned long f[8]; +} gtm_jmpbuf; + +/* The size of one line in hardware caches (in bytes). */ +#define HW_CACHELINE_SIZE 64 + +static inline void +cpu_relax (void) +{ + __asm volatile("" : : : "memory"); +} + +} // namespace GTMHIDDEN diff --git a/libitm/configure.tgt b/libitm/configure.tgt index d1beb5c9ec85a3621b0709394a726065cc89139a..30db505a7a91529fd3063cfd7c582674eb27ec54 100644 --- a/libitm/configure.tgt +++ b/libitm/configure.tgt @@ -121,6 +121,7 @@ case "${target_cpu}" in *) ARCH="${target_cpu}" ;; + sw_64*) ARCH=sw_64 ;; esac # For the benefit of top-level configure, determine if the cpu is supported. diff --git a/libsanitizer/asan/asan_allocator.h b/libsanitizer/asan/asan_allocator.h index b37d8ef4e8d2926e5014fe7a05467cd139d1023f..78ab20f4d1c0ff0eb43e881f634f8169ce23e50d 100644 --- a/libsanitizer/asan/asan_allocator.h +++ b/libsanitizer/asan/asan_allocator.h @@ -146,6 +146,11 @@ typedef DefaultSizeClassMap SizeClassMap; const uptr kAllocatorSpace = ~(uptr)0; const uptr kAllocatorSize = 0x8000000000ULL; // 500G typedef DefaultSizeClassMap SizeClassMap; +# elif SANITIZER_SW64 +// If kSpaceBeg is ~0 then SpaceBeg is chosen dynamically my mmap. +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +typedef DefaultSizeClassMap SizeClassMap; # else const uptr kAllocatorSpace = 0x600000000000ULL; const uptr kAllocatorSize = 0x40000000000ULL; // 4T. diff --git a/libsanitizer/asan/asan_interceptors.cpp b/libsanitizer/asan/asan_interceptors.cpp index b19cf25c7cd00ddede212ee932b4f8d0065814c9..0f8cf179e974c16c426233d4bbf45a837c0ced53 100644 --- a/libsanitizer/asan/asan_interceptors.cpp +++ b/libsanitizer/asan/asan_interceptors.cpp @@ -41,6 +41,8 @@ #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1" #elif defined(__mips__) && SANITIZER_LINUX #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.2" +#elif defined(__sw_64__) +#define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1" #endif namespace __asan { diff --git a/libsanitizer/asan/asan_mapping.h b/libsanitizer/asan/asan_mapping.h index 09be904270cedbb1bed9736dd3a616463180c83b..44187e3754b9683b7c33655d692efe7bc940640b 100644 --- a/libsanitizer/asan/asan_mapping.h +++ b/libsanitizer/asan/asan_mapping.h @@ -163,6 +163,7 @@ static const u64 kDefaultShort64bitShadowOffset = static const u64 kAArch64_ShadowOffset64 = 1ULL << 36; static const u64 kMIPS32_ShadowOffset32 = 0x0aaa0000; static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37; +static const u64 kSW64_ShadowOffset64 = 1ULL << 49; static const u64 kPPC64_ShadowOffset64 = 1ULL << 41; static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52; static const u64 kSPARC64_ShadowOffset64 = 1ULL << 43; // 0x80000000000 @@ -210,6 +211,8 @@ static const u64 kMyriadCacheBitMask32 = 0x40000000ULL; # define SHADOW_OFFSET kAArch64_ShadowOffset64 # elif defined(__powerpc64__) # define SHADOW_OFFSET kPPC64_ShadowOffset64 +# elif defined(__sw_64__) +# define SHADOW_OFFSET kSW64_ShadowOffset64 # elif defined(__s390x__) # define SHADOW_OFFSET kSystemZ_ShadowOffset64 # elif SANITIZER_FREEBSD diff --git a/libsanitizer/configure.tgt b/libsanitizer/configure.tgt index fa30065b5954efb301b88dd38c35de9e8ad00541..9ebad0020628bee80e65015550b7e7e3ffa8e08c 100644 --- a/libsanitizer/configure.tgt +++ b/libsanitizer/configure.tgt @@ -47,6 +47,10 @@ case "${target}" in ;; arm*-*-linux*) ;; + sw_64*-*-linux*) + TSAN_SUPPORTED=yes + LSAN_SUPPORTED=yes + ;; mips*64*-*-linux*) # This clause is only here to not match the supported mips*-*-linux*. UNSUPPORTED=1 diff --git a/libsanitizer/lsan/lsan_allocator.cpp b/libsanitizer/lsan/lsan_allocator.cpp index d86c3921395cb2bcb8d263c4171b4fbba10b2ffd..b3ce8dc8166bd324c3580a745cd99a7196c1fbbb 100644 --- a/libsanitizer/lsan/lsan_allocator.cpp +++ b/libsanitizer/lsan/lsan_allocator.cpp @@ -28,7 +28,7 @@ extern "C" void *memset(void *ptr, int value, uptr num); namespace __lsan { #if defined(__i386__) || defined(__arm__) static const uptr kMaxAllowedMallocSize = 1UL << 30; -#elif defined(__mips64) || defined(__aarch64__) +#elif defined(__mips64) || defined(__aarch64__) || defined(__sw_64__) static const uptr kMaxAllowedMallocSize = 4UL << 30; #else static const uptr kMaxAllowedMallocSize = 8UL << 30; diff --git a/libsanitizer/lsan/lsan_allocator.h b/libsanitizer/lsan/lsan_allocator.h index e1397099767284f28376214ac9284a610cac46d2..a5363392eb1f8abac829cf1d6720958a9e32479d 100644 --- a/libsanitizer/lsan/lsan_allocator.h +++ b/libsanitizer/lsan/lsan_allocator.h @@ -50,7 +50,7 @@ struct ChunkMetadata { }; #if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ - defined(__arm__) + defined(__arm__) || defined(__sw_64__) template struct AP32 { static const uptr kSpaceBeg = 0; diff --git a/libsanitizer/lsan/lsan_common.cpp b/libsanitizer/lsan/lsan_common.cpp index 9ff9f4c5d1c977d32b7ce668e7a810691bae3215..a86141326100669addcfc2e7559707777868f8d9 100644 --- a/libsanitizer/lsan/lsan_common.cpp +++ b/libsanitizer/lsan/lsan_common.cpp @@ -138,6 +138,8 @@ static inline bool CanBeAHeapPointer(uptr p) { return ((p >> 47) == 0); #elif defined(__mips64) return ((p >> 40) == 0); +#elif defined(__sw_64__) + return ((p >> 52) == 0); #elif defined(__aarch64__) unsigned runtimeVMA = (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1); diff --git a/libsanitizer/lsan/lsan_common.h b/libsanitizer/lsan/lsan_common.h index d24abe31b71b524e3f545410e772ce6744170ba8..ed09db21507b82667c26d2fc578f994354acad89 100644 --- a/libsanitizer/lsan/lsan_common.h +++ b/libsanitizer/lsan/lsan_common.h @@ -32,7 +32,7 @@ #if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) && \ (SANITIZER_WORDSIZE == 64) && \ (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \ - defined(__powerpc64__)) + defined(__powerpc64__) || defined(__sw_64__)) #define CAN_SANITIZE_LEAKS 1 #elif defined(__i386__) && \ (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) diff --git a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc index 50e3558b52e87275987a6ba4522a0ea66e02e382..283529f008524135b7d2990456de708817aae384 100644 --- a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc +++ b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc @@ -4516,7 +4516,11 @@ INTERCEPTOR(int, shmctl, int shmid, int cmd, void *buf) { } return res; } +#ifdef SANITIZER_SW64 +#define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION_VER(shmctl, "GLIBC_2.2"); +#else #define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION(shmctl); +#endif #else #define INIT_SHMCTL #endif diff --git a/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc b/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc index 31ff48cfd2cfccef973402f8613568e32afb205f..e83569b991dc3defdbc09f646eaa7185680d006e 100644 --- a/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc +++ b/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc @@ -2296,7 +2296,8 @@ POST_SYSCALL(ni_syscall)(long res) {} PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) { #if !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__)) + defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ + defined(__sw_64__)) if (data) { if (request == ptrace_setregs) { PRE_READ((void *)data, struct_user_regs_struct_sz); @@ -2317,7 +2318,8 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) { POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) { #if !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__)) + defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ + defined(__sw_64__)) if (res >= 0 && data) { // Note that this is different from the interceptor in // sanitizer_common_interceptors.inc. diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_linux.cpp index 15ccd738d858bb7afea21b59fc453027ec49f0a1..4ce47654ddfdea4b37ee1601946361bb3b14a4f6 100644 --- a/libsanitizer/sanitizer_common/sanitizer_linux.cpp +++ b/libsanitizer/sanitizer_common/sanitizer_linux.cpp @@ -42,6 +42,16 @@ #undef stat #endif +#if defined(__sw_64__) +#define stat kernel_stat +#define stat64 kernel_stat64 +#include +#undef stat +#undef stat64 +#include +#include +#endif + #include #include #include @@ -250,7 +260,7 @@ static void stat64_to_stat(struct stat64 *in, struct stat *out) { } #endif -#if defined(__mips64) +#if defined(__mips64) || defined(__sw_64__) // Undefine compatibility macros from // so that they would not clash with the kernel_stat // st_[a|m|c]time fields @@ -278,6 +288,12 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) { out->st_size = in->st_size; out->st_blksize = in->st_blksize; out->st_blocks = in->st_blocks; +#if defined(__sw_64__) + // There's no nsecs in sw_64's struct stat + out->st_atim.tv_sec = in->st_atime; + out->st_mtim.tv_sec = in->st_mtime; + out->st_ctim.tv_sec = in->st_ctime; +#else #if defined(__USE_MISC) || \ defined(__USE_XOPEN2K8) || \ defined(SANITIZER_ANDROID) @@ -295,6 +311,7 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) { out->st_ctime = in->st_ctime; out->st_atimensec = in->st_ctime_nsec; #endif +#endif } #endif @@ -305,8 +322,8 @@ uptr internal_stat(const char *path, void *buf) { return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS -# if defined(__mips64) - // For mips64, stat syscall fills buffer in the format of kernel_stat +# if defined(__mips64) || defined(__sw_64__) + // For mips64 and sw_64, stat syscall fills buffer in the format of kernel_stat struct kernel_stat kbuf; int res = internal_syscall(SYSCALL(stat), path, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); @@ -330,8 +347,8 @@ uptr internal_lstat(const char *path, void *buf) { return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS -# if SANITIZER_MIPS64 - // For mips64, lstat syscall fills buffer in the format of kernel_stat +# if SANITIZER_MIPS64 || SANITIZER_SW64 + // For mips64 and sw_64, lstat syscall fills buffer in the format of kernel_stat struct kernel_stat kbuf; int res = internal_syscall(SYSCALL(lstat), path, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); @@ -350,8 +367,8 @@ uptr internal_lstat(const char *path, void *buf) { uptr internal_fstat(fd_t fd, void *buf) { #if SANITIZER_FREEBSD || SANITIZER_OPENBSD || \ SANITIZER_LINUX_USES_64BIT_SYSCALLS -#if SANITIZER_MIPS64 && !SANITIZER_OPENBSD - // For mips64, fstat syscall fills buffer in the format of kernel_stat +#if (SANITIZER_MIPS64 || SANITIZER_SW64) && !SANITIZER_OPENBSD + // For mips64 and sw_64, fstat syscall fills buffer in the format of kernel_stat struct kernel_stat kbuf; int res = internal_syscall(SYSCALL(fstat), fd, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); @@ -727,6 +744,19 @@ uptr internal_waitpid(int pid, int *status, int options) { 0 /* rusage */); } +#ifdef __sw_64__ +uptr internal_getpid() { + return internal_syscall(SYSCALL(getxpid)); +} + +uptr internal_getppid() { + uptr ppid; + internal_syscall(SYSCALL(getxpid)); + asm("mov $20, %0\n" + :"=r"(ppid)); + return ppid; +} +#else uptr internal_getpid() { return internal_syscall(SYSCALL(getpid)); } @@ -734,6 +764,7 @@ uptr internal_getpid() { uptr internal_getppid() { return internal_syscall(SYSCALL(getppid)); } +#endif uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) { #if SANITIZER_FREEBSD @@ -760,7 +791,7 @@ uptr internal_sigaltstack(const void *ss, void *oss) { } int internal_fork() { -#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS +#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS || SANITIZER_SW64 return internal_syscall(SYSCALL(clone), SIGCHLD, 0); #else return internal_syscall(SYSCALL(fork)); @@ -816,7 +847,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) { // rt_sigaction, so we need to do the same (we'll need to reimplement the // restorers; for x86_64 the restorer address can be obtained from // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact). -#if !SANITIZER_ANDROID || !SANITIZER_MIPS32 +#if (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64 k_act.sa_restorer = u_act->sa_restorer; #endif } @@ -832,7 +863,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) { internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask, sizeof(__sanitizer_kernel_sigset_t)); u_oldact->sa_flags = k_oldact.sa_flags; -#if !SANITIZER_ANDROID || !SANITIZER_MIPS32 +#if (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64 u_oldact->sa_restorer = k_oldact.sa_restorer; #endif } @@ -1035,6 +1066,11 @@ uptr GetMaxVirtualAddress() { return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; # elif defined(__mips64) return (1ULL << 40) - 1; // 0x000000ffffffffffUL; +# elif defined(__sw_64__) +// SW64 has a 42-bit user address space(4TiB) +// according to TASK_SIZE in kernel. +// In sw6b PGTABLE is SW_4LEVEL. + return (1ULL << 52) - 1; // 0x000fffffffffffffUL; # elif defined(__s390x__) return (1ULL << 53) - 1; // 0x001fffffffffffffUL; #elif defined(__sparc__) @@ -1326,6 +1362,72 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, : "memory", "$29" ); return res; } +#elif defined(__sw_64__) +uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, + int *parent_tidptr, void *newtls, int *child_tidptr) { + long long res; + if (!fn || !child_stack) + return -EINVAL; + child_stack = (char *)child_stack - 4 * sizeof(unsigned long long); + ((unsigned long long *)child_stack)[0] = (uptr)fn; + ((unsigned long long *)child_stack)[1] = (uptr)arg; + ((unsigned long long *)child_stack)[2] = (uptr)flags; + + register void *r20 __asm__("$20") = newtls; + register int *r22 __asm__("$22") = child_tidptr; + + __asm__ __volatile__( + /* $v0 = syscall($v0 = __NR_clone, + * $a0 = flags, + * $a1 = child_stack, + * $a2 = parent_tidptr, + * $a3 = child_tidptr, + * $a4 = new_tls) + */ + "mov %[flag],$16\n" + "mov %[usp],$17\n" + "mov %[ptid],$18\n" + "ldl $19,0($sp)\n" + "mov %5,$20\n" + /* Store the fifth argument on stack + * if we are using 32-bit abi. + */ + "ldi $0,%[NR_clone];\n" + "sys_call 0x83;\n" + + /* if ($v0 != 0) + * return; + */ + "bne $0,1f;\n" + "mov $31,$15;\n" + /* Call "fn(arg)". */ + "ldl $27,0($sp);\n" + "ldl $16,8($sp);\n" + "ldi $sp,32($sp);\n" + + "call $26,($27),0;\n" + "ldgp $29, 0($26);\n" + + /* Call _exit($v0). */ + "mov $0,$16;\n" + "ldi $0,%[NR_exit];\n" + "sys_call 0x83;\n" + + /* Return to parent. */ + "1:\n" + : "=r" (res) + : [flag]"r"(flags), + [usp]"r"(child_stack), + [ptid]"r"(parent_tidptr), + "r"(r20), + "r"(r22), + [NR_clone]"i"(__NR_clone), + [NR_exit]"i"(__NR_exit) + : "memory", "$30"); + + return res; +} + #elif defined(__aarch64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { @@ -1879,6 +1981,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.regs[29]; *sp = ucontext->uc_mcontext.sp; +#elif defined(__sw_64__) + ucontext_t *ucontext = (ucontext_t*)context; + *pc = ucontext->uc_mcontext.sc_pc; + *bp = ucontext->uc_mcontext.sc_regs[15]; + *sp = ucontext->uc_mcontext.sc_regs[30]; #elif defined(__hppa__) ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.sc_iaoq[0]; @@ -1966,6 +2073,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.gregs[30]; *sp = ucontext->uc_mcontext.gregs[29]; +#elif defined(__sw_64__) + ucontext_t *ucontext = (ucontext_t*)context; + *pc = ucontext->uc_mcontext.sc_pc; + *bp = ucontext->uc_mcontext.sc_regs[15]; + *sp = ucontext->uc_mcontext.sc_regs[30]; #elif defined(__s390__) ucontext_t *ucontext = (ucontext_t*)context; # if defined(__s390x__) diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.h b/libsanitizer/sanitizer_common/sanitizer_linux.h index c28347ad963a7e1482e3aa06bd433670f76cd7d4..05976a700c34e4bcfc47609ac90fa5e17ae460e1 100644 --- a/libsanitizer/sanitizer_common/sanitizer_linux.h +++ b/libsanitizer/sanitizer_common/sanitizer_linux.h @@ -61,7 +61,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact); void internal_sigdelset(__sanitizer_sigset_t *set, int signum); #if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) \ || defined(__powerpc64__) || defined(__s390__) || defined(__i386__) \ - || defined(__arm__) + || defined(__arm__) || defined(__sw_64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr); #endif diff --git a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp index e09d568d8024863f7d97861cb2d9e246740083a5..18e7555ba8d8bf84bf4a9a2c796a8713c34f695e 100644 --- a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -262,7 +262,7 @@ void InitTlsSize() { } #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) || \ defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) || \ - defined(__arm__)) && \ + defined(__arm__) || defined(__sw_64__)) && \ SANITIZER_LINUX && !SANITIZER_ANDROID // sizeof(struct pthread) from glibc. static atomic_uintptr_t thread_descriptor_size; @@ -309,6 +309,8 @@ uptr ThreadDescriptorSize() { val = 1776; // from glibc.ppc64le 2.20-8.fc21 #elif defined(__s390__) val = FIRST_32_SECOND_64(1152, 1776); // valid for glibc 2.22 +#elif defined(__sw_64__) + val = 1776; #endif if (val) atomic_store_relaxed(&thread_descriptor_size, val); @@ -356,7 +358,7 @@ uptr ThreadSelf() { rdhwr %0,$29;\ .set pop" : "=r" (thread_pointer)); descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize(); -# elif defined(__aarch64__) || defined(__arm__) +# elif defined(__aarch64__) || defined(__arm__) || defined(__sw_64__) descr_addr = reinterpret_cast(__builtin_thread_pointer()) - ThreadDescriptorSize(); # elif defined(__s390__) @@ -435,7 +437,7 @@ static void GetTls(uptr *addr, uptr *size) { *addr -= *size; *addr += ThreadDescriptorSize(); # elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) \ - || defined(__arm__) + || defined(__arm__) || defined(__sw_64__) *addr = ThreadSelf(); *size = GetTlsSize(); # else diff --git a/libsanitizer/sanitizer_common/sanitizer_platform.h b/libsanitizer/sanitizer_common/sanitizer_platform.h index c68bfa25875585818721029b099bd4da4cddcb78..dea617abd40064616b475d827539b9f8d2578e41 100644 --- a/libsanitizer/sanitizer_common/sanitizer_platform.h +++ b/libsanitizer/sanitizer_common/sanitizer_platform.h @@ -147,6 +147,12 @@ # define SANITIZER_MIPS64 0 #endif +#if defined(__sw_64__) +# define SANITIZER_SW64 1 +#else +# define SANITIZER_SW64 0 +#endif + #if defined(__s390__) # define SANITIZER_S390 1 # if defined(__s390x__) @@ -242,6 +248,8 @@ # endif #elif defined(__sparc__) #define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 52) +#elif defined(__sw_64__) +# define SANITIZER_MMAP_RANGE_SIZE 1ULL << 52 #else # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47) #endif diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h index 61a6b82ef8184b98833ca482b6040cc6a9d3f198..820d458be0f6cd867744adfbcc658adb6ee11eb0 100644 --- a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h +++ b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h @@ -225,7 +225,11 @@ #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX #define SANITIZER_INTERCEPT_TIME SI_POSIX #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID || SI_SOLARIS +#if SANITIZER_SW64 +#define SANITIZER_INTERCEPT_GLOB64 0 +#else #define SANITIZER_INTERCEPT_GLOB64 SI_LINUX_NOT_ANDROID +#endif #define SANITIZER_INTERCEPT_WAIT SI_POSIX #define SANITIZER_INTERCEPT_INET SI_POSIX #define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM (SI_POSIX && !SI_OPENBSD) @@ -261,7 +265,7 @@ #if SI_LINUX_NOT_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__)) + defined(__s390__) || defined(__sw_64__)) #define SANITIZER_INTERCEPT_PTRACE 1 #else #define SANITIZER_INTERCEPT_PTRACE 0 diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp index f22f50391286b484a17c19b2b7efe17995fd858b..7a3e3ab60e97da46c6724dced8dc777a3619267c 100644 --- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp +++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp @@ -68,7 +68,7 @@ namespace __sanitizer { #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\ && !defined(__mips__) && !defined(__s390__)\ - && !defined(__sparc__) && !defined(__riscv) + && !defined(__sparc__) && !defined(__riscv) && !defined(__sw_64__) COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat)); #endif diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp index 8b4162bcd6a9131286a631b02db7384841ad99de..5585755f3f26ffbf3fb573907be2ade6a148ea4d 100644 --- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -92,7 +92,7 @@ #if SANITIZER_LINUX # include # include -# if defined(__mips64) || defined(__aarch64__) || defined(__arm__) +# if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || defined(__sw_64__) // for pt_regs # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; @@ -128,7 +128,7 @@ typedef struct user_fpregs elf_fpregset_t; #include #include #include -#if defined(__mips64) +#if defined(__mips64) || defined(__sw_64__) // for elf_gregset_t # include #endif #include @@ -232,7 +232,7 @@ namespace __sanitizer { // has been removed from glibc 2.28. #if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \ || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \ - || defined(__x86_64__) || (defined(__riscv) && __riscv_xlen == 64) + || defined(__x86_64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__sw_64__) #define SIZEOF_STRUCT_USTAT 32 #elif defined(__arm__) || defined(__i386__) || defined(__mips__) \ || defined(__powerpc__) || defined(__s390__) || defined(__sparc__) @@ -307,11 +307,11 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__)) + defined(__s390__) || defined(__sw_64__)) #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__) unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t); -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(__sw_64__) unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state); #elif defined(__s390__) @@ -322,12 +322,12 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct); #endif // __mips64 || __powerpc64__ || __aarch64__ #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \ - defined(__aarch64__) || defined(__arm__) || defined(__s390__) + defined(__aarch64__) || defined(__arm__) || defined(__s390__) || defined(__sw_64__) unsigned struct_user_fpxregs_struct_sz = 0; #else unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct); #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__ -// || __s390__ +// || __s390__ || __sw_64__ #ifdef __arm__ unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE; #else @@ -1059,7 +1059,7 @@ CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask); // didn't exist. CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_flags); #endif -#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32) +#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_restorer); #endif diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h index d82fd5e400587a46c23ba66d27d9568582c37952..9c572f4d32aecf650fe5097c868a4486a204ceee 100644 --- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h @@ -73,6 +73,9 @@ const unsigned struct_kernel_stat64_sz = 104; #elif defined(__aarch64__) const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 104; +#elif defined(__sw_64__) +const unsigned struct_kernel_stat_sz = 80; +const unsigned struct_kernel_stat64_sz = 136; #elif defined(__powerpc__) && !defined(__powerpc64__) const unsigned struct_kernel_stat_sz = 72; const unsigned struct_kernel_stat64_sz = 104; @@ -101,6 +104,9 @@ const unsigned struct_kernel_stat64_sz = 104; #elif defined(__riscv) && __riscv_xlen == 64 const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 104; +#elif defined(__sw_64__) +const unsigned struct_kernel_stat_sz = 80; +const unsigned struct_kernel_stat64_sz = 136; #endif struct __sanitizer_perf_event_attr { unsigned type; @@ -259,15 +265,15 @@ struct __sanitizer_shmid_ds { u64 shm_ctime; #else uptr shm_atime; -#if !defined(_LP64) && !defined(__mips__) +#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__) uptr __unused1; #endif uptr shm_dtime; -#if !defined(_LP64) && !defined(__mips__) +#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__) uptr __unused2; #endif uptr shm_ctime; -#if !defined(_LP64) && !defined(__mips__) +#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__) uptr __unused3; #endif #endif @@ -509,7 +515,7 @@ typedef int __sanitizer_clockid_t; #if SANITIZER_LINUX #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \ - defined(__mips__) + defined(__mips__) && !defined(__sw_64__) typedef unsigned __sanitizer___kernel_uid_t; typedef unsigned __sanitizer___kernel_gid_t; #else @@ -522,7 +528,7 @@ typedef long long __sanitizer___kernel_off_t; typedef long __sanitizer___kernel_off_t; #endif -#if defined(__powerpc__) || defined(__mips__) +#if defined(__powerpc__) || defined(__mips__) && !defined(__sw_64__) typedef unsigned int __sanitizer___kernel_old_uid_t; typedef unsigned int __sanitizer___kernel_old_gid_t; #else @@ -634,7 +640,7 @@ struct __sanitizer_sigaction { #endif #endif #endif -#if SANITIZER_LINUX +#if SANITIZER_LINUX && !defined(__sw_64__) void (*sa_restorer)(); #endif #if defined(__mips__) && (SANITIZER_WORDSIZE == 32) @@ -797,7 +803,7 @@ typedef void __sanitizer_FILE; #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__)) + defined(__s390__) || defined(__sw_64__)) extern unsigned struct_user_regs_struct_sz; extern unsigned struct_user_fpregs_struct_sz; extern unsigned struct_user_fpxregs_struct_sz; @@ -883,7 +889,7 @@ struct __sanitizer_cookie_io_functions_t { #define IOC_NRBITS 8 #define IOC_TYPEBITS 8 #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \ - defined(__sparc__) + defined(__sparc__) || defined(__sw_64__) #define IOC_SIZEBITS 13 #define IOC_DIRBITS 3 #define IOC_NONE 1U diff --git a/libsanitizer/sanitizer_common/sanitizer_stacktrace.h b/libsanitizer/sanitizer_common/sanitizer_stacktrace.h index f1f29e9f32ee811ac771ab6d56e9e1a565c7d0bd..67ba06cf71761f9617bee4b5c333039be40be5d9 100644 --- a/libsanitizer/sanitizer_common/sanitizer_stacktrace.h +++ b/libsanitizer/sanitizer_common/sanitizer_stacktrace.h @@ -22,6 +22,8 @@ static const u32 kStackTraceMax = 256; #if SANITIZER_LINUX && defined(__mips__) # define SANITIZER_CAN_FAST_UNWIND 0 +#elif defined(__sw_64__) +# define SANITIZER_CAN_FAST_UNWIND 0 #elif SANITIZER_WINDOWS # define SANITIZER_CAN_FAST_UNWIND 0 #elif SANITIZER_OPENBSD diff --git a/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 651d5056dd9d5feb4a7ca2f8cd1fe52d2080fd77..0cdfa8fad93561fd080b6baf244d72d3fe400033 100644 --- a/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -16,7 +16,7 @@ #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \ defined(__aarch64__) || defined(__powerpc64__) || \ defined(__s390__) || defined(__i386__) || \ - defined(__arm__)) + defined(__arm__) || defined(__sw_64__)) #include "sanitizer_stoptheworld.h" @@ -498,6 +498,11 @@ typedef struct user regs_struct; # define REG_SP regs[EF_REG29] # endif +#elif defined(__sw_64__) +typedef struct user regs_struct; +#define REG_SP regs[EF_SP] +#define ARCH_IOVEC_FOR_GETREGSET + #elif defined(__aarch64__) typedef struct user_pt_regs regs_struct; #define REG_SP sp diff --git a/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 3b19a6836ec53da3d2c4f71123e1e81cc6ffdecf..a5c7252cb31bb2dd7a1f3fa8d2399d4e21b0395c 100644 --- a/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -270,6 +270,8 @@ class LLVMSymbolizerProcess : public SymbolizerProcess { const char* const kSymbolizerArch = "--default-arch=s390x"; #elif defined(__s390__) const char* const kSymbolizerArch = "--default-arch=s390"; +#elif defined(__sw_64__) + const char* const kSymbolizerArch = "--default-arch=sw_64"; #else const char* const kSymbolizerArch = "--default-arch=unknown"; #endif diff --git a/libsanitizer/tsan/Makefile.am b/libsanitizer/tsan/Makefile.am index 5d37abd20de5060a92800412c2871a0d01d1402e..32b87fc6f5c0d6aa92c31216afe50abde64eff1d 100644 --- a/libsanitizer/tsan/Makefile.am +++ b/libsanitizer/tsan/Makefile.am @@ -49,7 +49,7 @@ tsan_files = \ tsan_sync.cpp libtsan_la_SOURCES = $(tsan_files) -EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S +EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_sw64.S libtsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la $(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS) libtsan_la_DEPENDENCIES = $(top_builddir)/sanitizer_common/libsanitizer_common.la $(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS) if LIBBACKTRACE_SUPPORTED diff --git a/libsanitizer/tsan/Makefile.in b/libsanitizer/tsan/Makefile.in index 74896427edfe8349e86a8078abd0b0599c659e7b..6448de25553c9101d110a3a90c4fb884878121a7 100644 --- a/libsanitizer/tsan/Makefile.in +++ b/libsanitizer/tsan/Makefile.in @@ -451,7 +451,7 @@ tsan_files = \ tsan_sync.cpp libtsan_la_SOURCES = $(tsan_files) -EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S +EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_sw64.S libtsan_la_LIBADD = \ $(top_builddir)/sanitizer_common/libsanitizer_common.la \ $(top_builddir)/interception/libinterception.la \ diff --git a/libsanitizer/tsan/tsan_interceptors_posix.cpp b/libsanitizer/tsan/tsan_interceptors_posix.cpp index 8aea1e4ec0513bbddc55bd55e307fa456e26d03e..c30ceeaf3e57437d50ba91cb4284f0255192991a 100644 --- a/libsanitizer/tsan/tsan_interceptors_posix.cpp +++ b/libsanitizer/tsan/tsan_interceptors_posix.cpp @@ -73,7 +73,7 @@ struct ucontext_t { }; #endif -#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1 +#if defined(__x86_64__) || defined(__mips__) || defined(__sw_64__) || SANITIZER_PPC64V1 #define PTHREAD_ABI_BASE "GLIBC_2.3.2" #elif defined(__aarch64__) || SANITIZER_PPC64V2 #define PTHREAD_ABI_BASE "GLIBC_2.17" @@ -142,7 +142,7 @@ typedef long long_t; # define F_TLOCK 2 /* Test and lock a region for exclusive use. */ # define F_TEST 3 /* Test a region for other processes locks. */ -#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD +#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD || SANITIZER_SW64 const int SA_SIGINFO = 0x40; const int SIG_SETMASK = 3; #elif defined(__mips__) @@ -2371,7 +2371,7 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act, sigactions[sig].sa_flags = *(volatile int const *)&act->sa_flags; internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask, sizeof(sigactions[sig].sa_mask)); -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_SW64 sigactions[sig].sa_restorer = act->sa_restorer; #endif internal_memcpy(&newact, act, sizeof(newact)); @@ -2674,6 +2674,14 @@ void InitializeInterceptors() { TSAN_INTERCEPT(pthread_timedjoin_np); #endif + #if SANITIZER_SW64 + // sw64 have two version of timer function, osf_xxx with @glibc2.0, + // which is 32bits syscall for old kernal. xxx with @glibc2.1 is 64bits + // syscall for new kernal, we use the new one. + TSAN_INTERCEPT_VER(setitimer, "GLIBC_2.1"); + TSAN_INTERCEPT_VER(setitimer, "GLIBC_2.1"); + #endif + TSAN_INTERCEPT_VER(pthread_cond_init, PTHREAD_ABI_BASE); TSAN_INTERCEPT_VER(pthread_cond_signal, PTHREAD_ABI_BASE); TSAN_INTERCEPT_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE); diff --git a/libsanitizer/tsan/tsan_platform.h b/libsanitizer/tsan/tsan_platform.h index 63eb14fcd3402abb8426ecfc30f2a62c4e32c9a7..e4e2e296148a51aca1017dbebc94d7e31fa66255 100644 --- a/libsanitizer/tsan/tsan_platform.h +++ b/libsanitizer/tsan/tsan_platform.h @@ -352,6 +352,44 @@ struct Mapping47 { // Indicates the runtime will define the memory regions at runtime. #define TSAN_RUNTIME_VMA 1 + +#elif defined(__sw_64__) + +// TODO(sw64_map): as sw64 kernal doesn't map such large space, we just map +// it for test, for now it works will. +// TODO(sw64_map_la): as sw64 map all space in low address, we set all user +// space +// in Lo address, perhaps there is some way to change it. +/* +C/C++ on linux/sw64 (52-bit VMA) +0000 0000 0000 - 0001 2000 0000: modules and main thread stack +0001 2000 0000 - 0008 0000 0000: main binary +0400 0000 0000 - 0600 0000 0000: pie main binary (including heap) +0600 0000 0000 - 4000 0000 0000: - +4000 0000 0000 - 6000 0000 0000: shadow +6000 0000 0000 - 7000 0000 0000: metainfo +7000 0000 0000 - 7c00 0000 0000: trace +*/ + +struct Mapping { + static const uptr kLoAppMemBeg = 0x0000000000000ull; + static const uptr kLoAppMemEnd = 0x0600000000000ull; + static const uptr kShadowBeg = 0x4000000000000ull; + static const uptr kShadowEnd = 0x6000000000000ull; + static const uptr kHiAppMemBeg = 0xfff0000000000ull; + static const uptr kHiAppMemEnd = 0xfff0000000000ull; + static const uptr kAppMemMsk = 0x0000000000000ull; + //distans between lo address to shadow begin + static const uptr kAppMemXor = 0x1000000000000ull; + static const uptr kHeapMemBeg = 0xff00000000000ull; + static const uptr kHeapMemEnd = 0xff00000000000ull; + static const uptr kMetaShadowBeg = 0x6000000000000ull; + static const uptr kMetaShadowEnd = 0x7000000000000ull; + static const uptr kTraceMemBeg = 0x7000000000000ull; + static const uptr kTraceMemEnd = 0x7c00000000000ull; + static const uptr kVdsoBeg = 0x3c00000000000000ull; +}; +#define TSAN_RUNTIME_VMA 1 #endif #elif SANITIZER_GO && !SANITIZER_WINDOWS && defined(__x86_64__) diff --git a/libsanitizer/tsan/tsan_platform_linux.cpp b/libsanitizer/tsan/tsan_platform_linux.cpp index 33fa586ca1b0f174e913945d46d57972d5a75af9..7d3c2eb380037bfbcc0a51ad426b4f2669c6eace 100644 --- a/libsanitizer/tsan/tsan_platform_linux.cpp +++ b/libsanitizer/tsan/tsan_platform_linux.cpp @@ -378,6 +378,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { return mangled_sp ^ xor_key; #elif defined(__mips__) return mangled_sp; +#elif defined(__sw_64__) + return mangled_sp; #else #error "Unknown platform" #endif @@ -394,6 +396,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # define LONG_JMP_SP_ENV_SLOT 13 # elif defined(__mips64) # define LONG_JMP_SP_ENV_SLOT 1 +# elif defined(__sw_64__) +# define LONG_JMP_SP_ENV_SLOT 8 # else # define LONG_JMP_SP_ENV_SLOT 6 # endif diff --git a/libsanitizer/tsan/tsan_platform_posix.cpp b/libsanitizer/tsan/tsan_platform_posix.cpp index 1a0faee0252e20ca730caf9587143e3f34ac2d43..5467951663c131df2eefc4ea04c57de63bfd7279 100644 --- a/libsanitizer/tsan/tsan_platform_posix.cpp +++ b/libsanitizer/tsan/tsan_platform_posix.cpp @@ -89,6 +89,9 @@ void InitializeShadowMemory() { } else { DCHECK(0); } +#elif defined(__sw_64__) + uptr kMadviseRangeBeg = 0x210000000000ull; + uptr kMadviseRangeSize = 0x010000000000ull; #endif NoHugePagesInShadow(MemToShadow(kMadviseRangeBeg), kMadviseRangeSize * kShadowMultiplier); diff --git a/libsanitizer/tsan/tsan_rtl.h b/libsanitizer/tsan/tsan_rtl.h index c38fc43a9f848726ae40562b3de465d0a0a1d4bf..35f904f8f72bcc59b6c0276e7ab8ae8be55fa2a8 100644 --- a/libsanitizer/tsan/tsan_rtl.h +++ b/libsanitizer/tsan/tsan_rtl.h @@ -54,7 +54,7 @@ namespace __tsan { #if !SANITIZER_GO struct MapUnmapCallback; -#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) +#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) || defined(__sw_64__) struct AP32 { static const uptr kSpaceBeg = 0; diff --git a/libsanitizer/tsan/tsan_rtl_sw64.S b/libsanitizer/tsan/tsan_rtl_sw64.S new file mode 100644 index 0000000000000000000000000000000000000000..f74bfef8d2a349bcdf8557b605723f5d205887ad --- /dev/null +++ b/libsanitizer/tsan/tsan_rtl_sw64.S @@ -0,0 +1,236 @@ +// The content of this file is sw64-only: +#if defined(__sw_64__) + +#include "sanitizer_common/sanitizer_asm.h" + +.section .text +.set noreorder + +ASM_HIDDEN(__tsan_setjmp) +.comm _ZN14__interception11real_setjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(setjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp)) +ASM_SYMBOL_INTERCEPTOR(setjmp): + ldgp $r29, 0($r27) + CFI_STARTPROC + + // Save frame/link register + ldi $sp, -32($sp) + stl $r26, 0($sp) + stl $fp, 8($sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (26, -32) + CFI_OFFSET (15, -24) + + // Adjust the SP for previous frame + ldi $fp,0($sp) + CFI_DEF_CFA_REGISTER (15) + + // Save env parameter + stl $r16, 16($sp) + CFI_OFFSET (0, -16) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + ldi $r16, 32($sp) + + // call tsan interceptor + //ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh + //ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow + ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal + call $r26, ($r27), 0 + ldgp $r29, 0($r26) + + // Restore env parameter + ldl $r16, 16($sp) + CFI_RESTORE (0) + + // Restore frame/link register + ldl $fp, 8($sp) + ldl $r26, 0($sp) + CFI_RESTORE (15) + CFI_RESTORE (26) + CFI_DEF_CFA (31, 0) + ldi $sp, 32($sp) + + // tail jump to libc setjmp + ldl $r27, _ZN14__interception11real_setjmpE($r29) !literal + ldl $r27, 0($r27) + + jmp $r31, ($r27) + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp)) + +ASM_HIDDEN(__tsan_setjmp) +.comm _ZN14__interception12real__setjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(_setjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp)) +ASM_SYMBOL_INTERCEPTOR(_setjmp): + ldgp $r29, 0($r27) + CFI_STARTPROC + + // Save frame/link register + ldi $sp, -32($sp) + stl $r26, 0($sp) + stl $fp, 8($sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (26, -32) + CFI_OFFSET (15, -24) + + // Adjust the SP for previous frame + ldi $fp,0($sp) + CFI_DEF_CFA_REGISTER (15) + + // Save env parameter + stl $r16, 16($sp) + CFI_OFFSET (0, -16) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + ldi $r16, 32($sp) + + // call tsan interceptor + //ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh + //ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow + ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal + call $r26, ($r27), 0 + ldgp $r29, 0($r26) + + // Restore env parameter + ldl $r16, 16($sp) + CFI_RESTORE (0) + + // Restore frame/link register + ldl $fp, 8($sp) + ldl $r26, 0($sp) + CFI_RESTORE (15) + CFI_RESTORE (26) + CFI_DEF_CFA (31, 0) + ldi $sp, 32($sp) + + // tail jump to libc setjmp + ldl $r27, _ZN14__interception12real__setjmpE($r29) !literal + ldl $r27, 0($r27) + + jmp $r31, ($r27) + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp)) + +ASM_HIDDEN(__tsan_setjmp) +.comm _ZN14__interception14real_sigsetjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) +ASM_SYMBOL_INTERCEPTOR(sigsetjmp): + ldgp $r29, 0($r27) + CFI_STARTPROC + + // Save frame/link register + ldi $sp, -32($sp) + stl $r26, 0($sp) + stl $fp, 8($sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (26, -32) + CFI_OFFSET (15, -24) + + // Adjust the SP for previous frame + ldi $fp,0($sp) + CFI_DEF_CFA_REGISTER (15) + + // Save env parameter + stl $r16, 16($sp) + stl $r17, 24($sp) + CFI_OFFSET (16, -16) + CFI_OFFSET (17, -8) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + ldi $r16, 32($sp) + + // call tsan interceptor + //ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh + //ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow + ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal + call $r26, ($r27), 0 + ldgp $r29, 0($r26) + + // Restore env parameter + ldl $r16, 16($sp) + ldl $r17, 24($sp) + CFI_RESTORE (0) + CFI_RESTORE (1) + + // Restore frame/link register + ldl $fp, 8($sp) + ldl $r26, 0($sp) + CFI_RESTORE (15) + CFI_RESTORE (26) + CFI_DEF_CFA (31, 0) + ldi $sp, 32($sp) + + // tail jump to libc setjmp + ldl $r27, _ZN14__interception14real_sigsetjmpE($r29) !literal + ldl $r27, 0($r27) + + jmp $r31, ($r27) + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) + +ASM_HIDDEN(__tsan_setjmp) +.comm _ZN14__interception16real___sigsetjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) +ASM_SYMBOL_INTERCEPTOR(__sigsetjmp): + ldgp $r29, 0($r27) + CFI_STARTPROC + + // Save frame/link register + ldi $sp, -32($sp) + stl $r26, 0($sp) + stl $fp, 8($sp) + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (26, -32) + CFI_OFFSET (15, -24) + + // Adjust the SP for previous frame + ldi $fp,0($sp) + CFI_DEF_CFA_REGISTER (15) + + // Save env parameter + stl $r16, 16($sp) + stl $r17, 24($sp) + CFI_OFFSET (16, -16) + CFI_OFFSET (17, -8) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + ldi $r16, 32($sp) + + // call tsan interceptor + //ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh + //ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow + ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal + call $r26, ($r27), 0 + ldgp $r29, 0($r26) + + // Restore env parameter + ldl $r16, 16($sp) + ldl $r17, 24($sp) + CFI_RESTORE (0) + CFI_RESTORE (1) + + // Restore frame/link register + ldl $fp, 8($sp) + ldl $r26, 0($sp) + CFI_RESTORE (15) + CFI_RESTORE (26) + CFI_DEF_CFA (31, 0) + ldi $sp, 32($sp) + + // tail jump to libc setjmp + ldl $r27, _ZN14__interception16real___sigsetjmpE($r29) !literal + ldl $r27, 0($r27) + jmp $r31, ($r27) + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) + +#endif diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index b6557a43465baed5e03677e869840cac2be233fc..302cce07ee729f301d1c33af91a62b45e9ae2a3f 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -4787,7 +4787,7 @@ AC_DEFUN([GLIBCXX_CHECK_EXCEPTION_PTR_SYMVER], [ AC_MSG_CHECKING([for first version to support std::exception_ptr]) case ${target} in aarch64-*-* | alpha-*-* | hppa*-*-* | i?86-*-* | x86_64-*-* | \ - m68k-*-* | powerpc*-*-* | s390*-*-* | *-*-solaris* ) + m68k-*-* | powerpc*-*-* | s390*-*-* | sw_64-*-* | *-*-solaris* ) ac_exception_ptr_since_gcc46=yes ;; *) diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index 766a0a8d504182f090e2932a86da5ac5b4b24a97..f5e60c33968f1596c97bfff68fe47d632421379c 100644 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -74627,7 +74627,8 @@ case "$target" in powerpc*-*-linux* | \ sparc*-*-linux* | \ s390*-*-linux* | \ - alpha*-*-linux*) + alpha*-*-linux* | \ + sw_64*-*-linux*) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -76289,7 +76290,7 @@ _ACEOF $as_echo_n "checking for first version to support std::exception_ptr... " >&6; } case ${target} in aarch64-*-* | alpha-*-* | hppa*-*-* | i?86-*-* | x86_64-*-* | \ - m68k-*-* | powerpc*-*-* | s390*-*-* | *-*-solaris* ) + m68k-*-* | powerpc*-*-* | s390*-*-* | sw_64-*-* | *-*-solaris* ) ac_exception_ptr_since_gcc46=yes ;; *) diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac index 07cf05b6856a06a101dddfef2928ce7c5f75a579..0ea7f299127729fb32903ababfeb7202bc0b3fad 100644 --- a/libstdc++-v3/configure.ac +++ b/libstdc++-v3/configure.ac @@ -403,7 +403,8 @@ case "$target" in powerpc*-*-linux* | \ sparc*-*-linux* | \ s390*-*-linux* | \ - alpha*-*-linux*) + alpha*-*-linux* | \ + sw_64*-*-linux*) AC_TRY_COMPILE(, [ #if !defined __LONG_DOUBLE_128__ || (defined(__sparc__) && defined(__arch64__)) #error no need for long double compatibility diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host index 898db37d9a22a872a62fe6c7ae02c1955b0b66f6..52f7cf22599793716b304229340178cce660ec98 100644 --- a/libstdc++-v3/configure.host +++ b/libstdc++-v3/configure.host @@ -123,6 +123,9 @@ case "${host_cpu}" in sparc* | ultrasparc) try_cpu=sparc ;; + sw_64*) + try_cpu=sw_64 + ;; *) if test -d ${glibcxx_srcdir}/config/cpu/${host_cpu}; then try_cpu=${host_cpu} diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index ef120134914df26ef594613b236bee57e185f5df..565f2ad80de772dfc3661e4751403e1467f251fe 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -460,7 +460,7 @@ namespace __detail // Return a bucket count appropriate for n elements std::size_t _M_bkt_for_elements(std::size_t __n) const - { return __builtin_ceill(__n / (long double)_M_max_load_factor); } + { return __builtin_ceil(__n / (double)_M_max_load_factor); } // __n_bkt is current bucket count, __n_elt is current element count, // and __n_ins is number of elements to be inserted. Do we need to @@ -560,7 +560,7 @@ namespace __detail _M_next_resize = numeric_limits::max(); else _M_next_resize - = __builtin_floorl(__res * (long double)_M_max_load_factor); + = __builtin_floor(__res * (double)_M_max_load_factor); return __res; } @@ -568,7 +568,7 @@ namespace __detail // Return a bucket count appropriate for n elements std::size_t _M_bkt_for_elements(std::size_t __n) const noexcept - { return __builtin_ceill(__n / (long double)_M_max_load_factor); } + { return __builtin_ceil(__n / (double)_M_max_load_factor); } // __n_bkt is current bucket count, __n_elt is current element count, // and __n_ins is number of elements to be inserted. Do we need to @@ -588,11 +588,11 @@ namespace __detail / (long double)_M_max_load_factor; if (__min_bkts >= __n_bkt) return { true, - _M_next_bkt(std::max(__builtin_floorl(__min_bkts) + 1, + _M_next_bkt(std::max(__builtin_floor(__min_bkts) + 1, __n_bkt * _S_growth_factor)) }; _M_next_resize - = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor); + = __builtin_floor(__n_bkt * (double)_M_max_load_factor); return { false, 0 }; } else diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc index de8e2c7cb915bec8f9066bb00cfb60db49e3adca..5584efa7162073a2036f4ef0e69b9d43b8b543b4 100644 --- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc +++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc @@ -58,7 +58,7 @@ namespace __detail return 1; _M_next_resize = - __builtin_floorl(__fast_bkt[__n] * (long double)_M_max_load_factor); + __builtin_floor(__fast_bkt[__n] * (double)_M_max_load_factor); return __fast_bkt[__n]; } @@ -81,7 +81,7 @@ namespace __detail _M_next_resize = numeric_limits::max(); else _M_next_resize = - __builtin_floorl(*__next_bkt * (long double)_M_max_load_factor); + __builtin_floor(*__next_bkt * (double)_M_max_load_factor); return *__next_bkt; } @@ -105,16 +105,16 @@ namespace __detail // If _M_next_resize is 0 it means that we have nothing allocated so // far and that we start inserting elements. In this case we start // with an initial bucket size of 11. - long double __min_bkts + double __min_bkts = std::max(__n_elt + __n_ins, _M_next_resize ? 0 : 11) - / (long double)_M_max_load_factor; + / (double)_M_max_load_factor; if (__min_bkts >= __n_bkt) return { true, - _M_next_bkt(std::max(__builtin_floorl(__min_bkts) + 1, + _M_next_bkt(std::max(__builtin_floor(__min_bkts) + 1, __n_bkt * _S_growth_factor)) }; _M_next_resize - = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor); + = __builtin_floor(__n_bkt * (double)_M_max_load_factor); return { false, 0 }; } else