From 88ee34f8067825f2b67ed3a225feccec3116b8a1 Mon Sep 17 00:00:00 2001 From: Xing Li Date: Wed, 19 Jul 2023 15:37:59 +0800 Subject: [PATCH] Sync code to gcc 8.3 vec.36 Fix some bug: LoongArch NOOP truncation gcc8 do_spec_2 --- 0001-Sync-to-gcc-8-vec-36.patch | 30492 ++++++++++++++++ ...ocessing-to-allow-in-function-argume.patch | 220 + ...-NOOP_TRUNCATION-and-fix-extendsidi2.patch | 101 + gcc.spec | 21 +- 4 files changed, 30830 insertions(+), 4 deletions(-) create mode 100644 0001-Sync-to-gcc-8-vec-36.patch create mode 100644 Improve-specs-processing-to-allow-in-function-argume.patch create mode 100644 LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch diff --git a/0001-Sync-to-gcc-8-vec-36.patch b/0001-Sync-to-gcc-8-vec-36.patch new file mode 100644 index 0000000..e41b234 --- /dev/null +++ b/0001-Sync-to-gcc-8-vec-36.patch @@ -0,0 +1,30492 @@ +From 474c84c016b0c36c9aace9a41d6d9df8107cf3e8 Mon Sep 17 00:00:00 2001 +From: Lixing +Date: Wed, 19 Jul 2023 10:47:27 +0800 +Subject: [PATCH] Sync to gcc-8-vec-36 + +--- + .../config/loongarch/loongarch-common.c | 41 +- + gcc/config.gcc | 589 +- + gcc/config.host | 12 - + gcc/config/loongarch/constraints.md | 371 +- + gcc/config/loongarch/driver-native.c | 82 - + gcc/config/loongarch/elf.h | 56 +- + gcc/config/loongarch/frame-header-opt.c | 292 - + gcc/config/loongarch/generic.md | 21 +- + gcc/config/loongarch/genopt.sh | 110 - + gcc/config/loongarch/genopts/genstr.sh | 104 + + .../loongarch/genopts/loongarch-strings | 68 + + gcc/config/loongarch/genopts/loongarch.opt.in | 242 + + gcc/config/loongarch/gnu-user.h | 135 +- + gcc/config/loongarch/la464.md | 132 + + gcc/config/loongarch/larchintrin.h | 495 +- + gcc/config/loongarch/lasx.md | 684 +- + gcc/config/loongarch/lasxintrin.h | 46 +- + gcc/config/loongarch/linux-common.h | 68 - + gcc/config/loongarch/linux.h | 37 +- + gcc/config/loongarch/loongarch-builtins.c | 549 +- + gcc/config/loongarch/loongarch-c.c | 158 +- + gcc/config/loongarch/loongarch-cpu.c | 291 + + .../{loongarch-d.c => loongarch-cpu.h} | 30 +- + gcc/config/loongarch/loongarch-cpus.def | 38 - + gcc/config/loongarch/loongarch-def.c | 232 + + gcc/config/loongarch/loongarch-def.h | 161 + + gcc/config/loongarch/loongarch-driver.c | 206 + + gcc/config/loongarch/loongarch-driver.h | 72 + + gcc/config/loongarch/loongarch-ftypes.def | 173 +- + gcc/config/loongarch/loongarch-modes.def | 6 +- + gcc/config/loongarch/loongarch-opts.c | 725 ++ + gcc/config/loongarch/loongarch-opts.h | 86 +- + gcc/config/loongarch/loongarch-protos.h | 155 +- + gcc/config/loongarch/loongarch-str.h | 68 + + gcc/config/loongarch/loongarch-tables.opt | 34 - + gcc/config/loongarch/loongarch-tune.h | 51 + + gcc/config/loongarch/loongarch.c | 8440 +++++++++-------- + gcc/config/loongarch/loongarch.h | 1523 +-- + gcc/config/loongarch/loongarch.md | 3658 +++---- + gcc/config/loongarch/loongarch.opt | 252 +- + gcc/config/loongarch/lsx.md | 358 +- + gcc/config/loongarch/lsxintrin.h | 46 +- + gcc/config/loongarch/predicates.md | 250 +- + gcc/config/loongarch/rtems.h | 39 - + gcc/config/loongarch/sde.opt | 28 - + gcc/config/loongarch/sync.md | 746 +- + gcc/config/loongarch/t-linux | 65 +- + gcc/config/loongarch/t-loongarch | 59 +- + gcc/config/loongarch/x-native | 3 - + libgcc/config/loongarch/crtfastmath.c | 48 +- + libgcc/config/loongarch/crti.S | 43 - + libgcc/config/loongarch/crtn.S | 39 - + libgcc/config/loongarch/gthr-loongnixsde.h | 237 - + libgcc/config/loongarch/linux-unwind.h | 27 +- + libgcc/config/loongarch/sfp-machine.h | 166 +- + libgcc/config/loongarch/t-elf | 3 - + libgcc/config/loongarch/t-loongarch | 2 - + libgcc/config/loongarch/t-sdemtk | 3 - + libgcc/config/loongarch/t-vr | 0 + 59 files changed, 12128 insertions(+), 10527 deletions(-) + delete mode 100644 gcc/config/loongarch/driver-native.c + delete mode 100644 gcc/config/loongarch/frame-header-opt.c + delete mode 100644 gcc/config/loongarch/genopt.sh + create mode 100755 gcc/config/loongarch/genopts/genstr.sh + create mode 100644 gcc/config/loongarch/genopts/loongarch-strings + create mode 100644 gcc/config/loongarch/genopts/loongarch.opt.in + create mode 100644 gcc/config/loongarch/la464.md + delete mode 100644 gcc/config/loongarch/linux-common.h + create mode 100644 gcc/config/loongarch/loongarch-cpu.c + rename gcc/config/loongarch/{loongarch-d.c => loongarch-cpu.h} (59%) + delete mode 100644 gcc/config/loongarch/loongarch-cpus.def + create mode 100644 gcc/config/loongarch/loongarch-def.c + create mode 100644 gcc/config/loongarch/loongarch-def.h + create mode 100644 gcc/config/loongarch/loongarch-driver.c + create mode 100644 gcc/config/loongarch/loongarch-driver.h + create mode 100644 gcc/config/loongarch/loongarch-opts.c + create mode 100644 gcc/config/loongarch/loongarch-str.h + delete mode 100644 gcc/config/loongarch/loongarch-tables.opt + create mode 100644 gcc/config/loongarch/loongarch-tune.h + delete mode 100644 gcc/config/loongarch/rtems.h + delete mode 100644 gcc/config/loongarch/sde.opt + delete mode 100644 gcc/config/loongarch/x-native + delete mode 100644 libgcc/config/loongarch/crti.S + delete mode 100644 libgcc/config/loongarch/crtn.S + delete mode 100644 libgcc/config/loongarch/gthr-loongnixsde.h + delete mode 100644 libgcc/config/loongarch/t-elf + delete mode 100644 libgcc/config/loongarch/t-sdemtk + delete mode 100644 libgcc/config/loongarch/t-vr + +diff --git a/gcc/common/config/loongarch/loongarch-common.c b/gcc/common/config/loongarch/loongarch-common.c +index afbbc3ad0..ccdc8f498 100644 +--- a/gcc/common/config/loongarch/loongarch-common.c ++++ b/gcc/common/config/loongarch/loongarch-common.c +@@ -1,5 +1,5 @@ +-/* Common hooks for LARCH. +- Copyright (C) 1989-2018 Free Software Foundation, Inc. ++/* Common hooks for LoongArch. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -25,44 +25,21 @@ along with GCC; see the file COPYING3. If not see + #include "common/common-target-def.h" + #include "opts.h" + #include "flags.h" ++#include "diagnostic-core.h" + +-#undef TARGET_OPTION_OPTIMIZATION_TABLE ++#undef TARGET_OPTION_OPTIMIZATION_TABLE + #define TARGET_OPTION_OPTIMIZATION_TABLE loongarch_option_optimization_table + + /* Set default optimization options. */ + static const struct default_options loongarch_option_optimization_table[] = + { +- { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 }, +- { OPT_LEVELS_NONE, 0, NULL, 0 } ++ { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 }, ++ /* Enable -fsched-pressure by default when optimizing. */ ++ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, ++ { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +-/* Implement TARGET_HANDLE_OPTION. */ +- +-static bool +-loongarch_handle_option (struct gcc_options *opts, +- struct gcc_options *opts_set ATTRIBUTE_UNUSED, +- const struct cl_decoded_option *decoded, +- location_t loc ATTRIBUTE_UNUSED) +-{ +- size_t code = decoded->opt_index; +- +- switch (code) +- { +- case OPT_mno_flush_func: +- opts->x_loongarch_cache_flush_func = NULL; +- return true; +- +- default: +- return true; +- } +-} +- + #undef TARGET_DEFAULT_TARGET_FLAGS +-#define TARGET_DEFAULT_TARGET_FLAGS \ +- (TARGET_DEFAULT \ +- | TARGET_CPU_DEFAULT \ +- | MASK_CHECK_ZERO_DIV) +-#undef TARGET_HANDLE_OPTION +-#define TARGET_HANDLE_OPTION loongarch_handle_option ++#define TARGET_DEFAULT_TARGET_FLAGS MASK_CHECK_ZERO_DIV + + struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; +diff --git a/gcc/config.gcc b/gcc/config.gcc +index ba061efa4..cca2e6e43 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -427,10 +427,10 @@ lm32*) + ;; + loongarch*-*-*) + cpu_type=loongarch +- d_target_objs="loongarch-d.o" + extra_headers="lasxintrin.h lsxintrin.h larchintrin.h" +- extra_objs="frame-header-opt.o loongarch-c.o loongarch-builtins.o" +- extra_options="${extra_options} g.opt fused-madd.opt loongarch/loongarch-tables.opt" ++ extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" ++ extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" ++ extra_options="${extra_options} g.opt fused-madd.opt" + ;; + m32r*-*-*) + cpu_type=m32r +@@ -2193,54 +2193,30 @@ mips*-*-linux*) # Linux MIPS, either endian. + fi + ;; + loongarch*-*-linux*) +- case ${with_abi} in +- "") +- echo "not specify ABI, default is lp64 for loongarch64" +- with_abi=lp64 # for default +- ;; +- lpx32) +- ;; +- lp32) +- ;; +- lp64) +- ;; +- *) +- echo "Unknown ABI used in --with-abi=$with_abi" +- exit 1 +- esac +- +- enable_multilib="yes" +- loongarch_multilibs="${with_multilib_list}" +- if test "$loongarch_multilibs" = "default"; then +- loongarch_multilibs="${with_abi}" +- fi +- loongarch_multilibs=`echo $loongarch_multilibs | sed -e 's/,/ /g'` +- for loongarch_multilib in ${loongarch_multilibs}; do +- case ${loongarch_multilib} in +- lp64 | lpx32 | lp32 ) +- TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG},${loongarch_multilib}" +- ;; +- *) +- echo "--with-multilib-list=${loongarch_multilib} not supported." +- exit 1 +- esac +- done +- TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` ++ tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" ++ tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" ++ extra_options="${extra_options} linux-android.opt" ++ tmake_file="${tmake_file} loongarch/t-linux" ++ gnu_ld=yes ++ gas=yes + +- if test `for one_abi in ${loongarch_multilibs}; do if [ x\$one_abi = x$with_abi ]; then echo 1; exit 0; fi; done; echo 0;` = "0"; then +- echo "--with-abi=${with_abi} must be one of --with-multilib-list=${with_multilib_list}" +- exit 1 +- fi ++ # Force .init_array support. The configure script cannot always ++ # automatically detect that GAS supports it, yet we require it. ++ gcc_cv_initfini_array=yes ++ ;; + +- tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/linux-common.h" +- extra_options="${extra_options} linux-android.opt" ++loongarch*-*-elf*) ++ tm_file="elfos.h newlib-stdint.h ${tm_file}" ++ tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" + tmake_file="${tmake_file} loongarch/t-linux" + gnu_ld=yes + gas=yes ++ + # Force .init_array support. The configure script cannot always + # automatically detect that GAS supports it, yet we require it. + gcc_cv_initfini_array=yes + ;; ++ + mips*-mti-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/n32-elf.h mips/sde.h mips/mti-elf.h" + tmake_file="mips/t-mti-elf" +@@ -2295,31 +2271,6 @@ mips*-sde-elf*) + ;; + esac + ;; +-loongarch*-sde-elf*) +- tm_file="elfos.h newlib-stdint.h ${tm_file} loongarch/elf.h loongarch/sde.h" +-# tmake_file="loongarch/t-sde" +- extra_options="${extra_options} loongarch/sde.opt" +- case "${with_newlib}" in +- yes) +- # newlib / libgloss. +- ;; +- *) +- # MIPS toolkit libraries. +- tm_file="$tm_file loongarch/sdemtk.h" +- tmake_file="$tmake_file loongarch/t-sdemtk" +- case ${enable_threads} in +- "" | yes | loongarchsde) +- thread_file='loongarchsde' +- ;; +- esac +- ;; +- esac +- case ${target} in +- loongarch*) +- tm_defines="LARCH_ISA_DEFAULT=0 LARCH_ABI_DEFAULT=ABILP64" +- ;; +- esac +- ;; + mipsisa32-*-elf* | mipsisa32el-*-elf* | \ + mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ + mipsisa32r6-*-elf* | mipsisa32r6el-*-elf* | \ +@@ -3259,7 +3210,7 @@ case ${target} in + ;; + *-*-linux* | *-*-gnu*) + case ${target} in +- aarch64*-* | arm*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-*) ++ aarch64*-* | arm*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-* | loongarch*-*) + default_gnu_indirect_function=yes + ;; + esac +@@ -4450,57 +4401,466 @@ case "${target}" in + ;; + + loongarch*-*-*) +- supported_defaults="abi arch float fpu tune" ++ supported_defaults="abi arch tune fpu simd multilib-default" ++ ++ # Local variables ++ unset \ ++ abi_pattern abi_default \ ++ abiext_pattern abiext_default \ ++ arch_pattern arch_default \ ++ fpu_pattern fpu_default \ ++ triplet_os triplet_abi ++ ++ # Infer ABI from the triplet. ++ case ${target} in ++ loongarch64-*-*-*f64) ++ abi_pattern="lp64d" ++ triplet_abi="" ++ ;; ++ loongarch64-*-*-*f32) ++ abi_pattern="lp64f" ++ triplet_abi="f32" ++ ;; ++ loongarch64-*-*-*sf) ++ abi_pattern="lp64s" ++ triplet_abi="sf" ++ ;; ++ loongarch64-*-*) ++ abi_pattern="lp64[dfs]" ++ abi_default="lp64d" ++ triplet_abi="" ++ ;; ++ *) ++ echo "Unsupported target ${target}." 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ abiext_pattern="*" ++ abiext_default="base" ++ ++ # Get the canonical triplet (multiarch specifier). ++ case ${target} in ++ *-linux-gnu*) triplet_os="linux-gnu";; ++ *-linux-musl*) triplet_os="linux-musl";; ++ *-elf*) triplet_os="elf";; ++ *) ++ echo "Unsupported target ${target}." 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}" + ++ ++ # Perform initial sanity checks on --with-* options. + case ${with_arch} in +- loongarch64 | loongarch32) +- # OK +- default_loongarch_arch=$with_arch ++ "" | abi-default | loongarch64 | la[2346]64) ;; # OK, append here. ++ native) ++ if test x${host} != x${target}; then ++ echo "--with-arch=native is illegal for cross-compiler." 1>&2 ++ exit 1 ++ fi + ;; +- "") +- # fallback +- default_loongarch_arch=loongarch64 ++ *) ++ echo "Unknown arch in --with-arch=$with_arch" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ case ${with_abi} in ++ lp64) ++ # Legacy ++ with_abi=lp64d + ;; ++ ++ "" | lp64d | lp64f | lp64s) ;; # OK, append here. + *) +- echo "Unknown arch given in --with-arch=$with_arch, available choices are: loongarch64" 1>&2 ++ echo "Unsupported ABI given in --with-abi=$with_abi" 1>&2 + exit 1 + ;; + esac + ++ case ${with_abiext} in ++ "" | base) ;; # OK, append here. ++ *) ++ echo "Unsupported ABI extention type $with_abiext" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ case ${with_fpu} in ++ "" | none | 32 | 64) ;; # OK, append here. ++ 0) ++ # Convert "0" to "none" for upcoming checks. ++ with_fpu="none" ++ ;; ++ *) ++ echo "Unknown fpu type in --with-fpu=$with_fpu" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ case ${with_simd} in ++ "" | none) ;; ++ lsx | lasx) # OK, append here. ++ case ${with_fpu} in ++ 64) ;; ++ "") with_fpu=64 ;; ++ *) ++ echo "--with-simd=${with_simd} conflicts with --with-fpu=${with_fpu}" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ;; ++ ++ *) ++ echo "Unknown SIMD extension in --with-simd=$with_simd" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ ++ # Set default value for with_abi. + case ${with_abi} in +- lp64 | lp32) +- # OK +- default_loongarch_abi=$with_abi ++ "") ++ if test x${abi_default} != x; then ++ with_abi=${abi_default} ++ else ++ with_abi=${abi_pattern} ++ fi ++ ;; ++ ++ *) ++ if echo "${with_abi}" | grep -E "^${abi_pattern}$" > /dev/null; then ++ : # OK ++ else ++ echo "Incompatible options:" \ ++ "--with-abi=${with_abi} and --target=${target}." 1>&2 ++ exit 1 ++ fi + ;; ++ esac ++ ++ # Set default value for with_abiext (internal) ++ case ${with_abiext} in + "") +- # fallback +- default_loongarch_abi=lp64 ++ if test x${abiext_default} != x; then ++ with_abiext=${abiext_default} ++ else ++ with_abiext=${abiext_pattern} ++ fi ++ ;; ++ ++ *) ++ if echo "${with_abiext}" | grep -E "^${abiext_pattern}$" > /dev/null; then ++ : # OK ++ else ++ echo "The ABI extension type \"${with_abiext}\"" \ ++ "is incompatible with --target=${target}." 1>&2 ++ exit 1 ++ fi ++ ++ ;; ++ esac ++ ++ # Infer ISA-related default options from the ABI: pass 1 ++ case ${with_abi}/${with_abiext} in ++ lp64*/base) ++ # architectures that support lp64* ABI ++ arch_pattern="native|abi-default|loongarch64|la[2346]64" ++ # default architecture for lp64* ABI ++ arch_default="abi-default" + ;; + *) +- echo "Unknown ABI given in --with-abi=$with_abi, available choices are: lp32 lp64" 1>&2 ++ echo "Unsupported ABI type ${with_abi}/${with_abiext}." 1>&2 + exit 1 + ;; + esac + +- case ${with_float} in +- "" | soft | hard) +- # OK ++ # Infer ISA-related default options from the ABI: pass 2 ++ case ${with_abi}/${with_abiext} in ++ lp64d/base) ++ fpu_pattern="64" ++ ;; ++ lp64f/base) ++ fpu_pattern="32|64" ++ fpu_default="32" ++ ;; ++ lp64s/base) ++ fpu_pattern="none|32|64" ++ fpu_default="none" + ;; + *) +- echo "Unknown floating point type used in --with-float=$with_float" 1>&2 ++ echo "Unsupported ABI type ${with_abi}/${with_abiext}." 1>&2 + exit 1 + ;; + esac + ++ ## Set default value for with_arch. ++ case ${with_arch} in ++ "") ++ if test x${arch_default} != x; then ++ with_arch=${arch_default} ++ else ++ with_arch=${arch_pattern} ++ fi ++ ;; ++ ++ *) ++ if echo "${with_arch}" | grep -E "^${arch_pattern}$" > /dev/null; then ++ : # OK ++ else ++ echo "${with_abi}/${with_abiext} ABI cannot be implemented with" \ ++ "--with-arch=${with_arch}." 1>&2 ++ exit 1 ++ fi ++ ;; ++ esac ++ ++ ## Set default value for with_fpu. + case ${with_fpu} in +- "" | single | double) +- # OK ++ "") ++ if test x${fpu_default} != x; then ++ with_fpu=${fpu_default} ++ else ++ with_fpu=${fpu_pattern} ++ fi + ;; ++ + *) +- echo "Unknown fpu type used in --with-fpu=$with_fpu" 1>&2 +- exit 1 ++ if echo "${with_fpu}" | grep -E "^${fpu_pattern}$" > /dev/null; then ++ : # OK ++ else ++ echo "${with_abi}/${with_abiext} ABI cannot be implemented with" \ ++ "--with-fpu=${with_fpu}." 1>&2 ++ exit 1 ++ fi ++ ;; ++ esac ++ ++ ++ # Check default with_tune configuration using with_arch. ++ case ${with_arch} in ++ loongarch64) ++ tune_pattern="native|abi-default|loongarch64|la[2346]64" ++ ;; ++ *) ++ # By default, $with_tune == $with_arch ++ tune_pattern="*" ++ ;; ++ esac ++ ++ case ${with_tune} in ++ "") ;; # OK ++ *) ++ if echo "${with_tune}" | grep -E "^${tune_pattern}$" > /dev/null; then ++ : # OK ++ else ++ echo "Incompatible options: --with-tune=${with_tune}" \ ++ "and --with-arch=${with_arch}." 1>&2 ++ exit 1 ++ fi + ;; + esac ++ ++ # Handle --with-multilib-default ++ if echo "${with_multilib_default}" \ ++ | grep -E -e '[[:space:]]' -e '//' -e '/$' -e '^/' > /dev/null 2>&1; then ++ echo "Invalid argument to --with-multilib-default." 1>&2 ++ exit 1 ++ fi ++ ++ if test x${with_multilib_default} = x; then ++ # Use -march=abi-default by default when building libraries. ++ with_multilib_default="/march=abi-default" ++ else ++ unset parse_state component ++ parse_state=arch ++ for component in $(echo "${with_multilib_default}" | tr '/' ' '); do ++ case ${parse_state},${component} in ++ arch,|arch,abi-default) ++ # ABI-default: use the ABI's default ARCH configuration for ++ # multilib library builds, unless otherwise specified ++ # in --with-multilib-list. ++ with_multilib_default="/march=abi-default" ++ parse_state=opts ++ ;; ++ arch,fixed) ++ # Fixed: use the default gcc configuration for all multilib ++ # builds by default. ++ with_multilib_default="" ++ parse_state=opts ++ ;; ++ arch,*) ++ with_multilib_default="/march=abi-default" ++ parse_state=opts ++ ;& ++ opts,*) ++ with_multilib_default="${with_multilib_default}/${component}" ++ ;; ++ esac ++ done ++ unset parse_state component ++ fi ++ ++ # Handle --with-multilib-list. ++ if test x"${with_multilib_list}" = x \ ++ || test x"${with_multilib_list}" = xno \ ++ || test x"${with_multilib_list}" = xdefault \ ++ || test x"${enable_multilib}" != xyes; then ++ ++ with_multilib_list="${with_abi}/${with_abiext}" ++ fi ++ ++ # Check if the configured default ABI combination is included in ++ # ${with_multilib_list}. ++ loongarch_multilib_list_sane=no ++ ++ # This one goes to TM_MULTILIB_CONFIG, for use in t-linux. ++ loongarch_multilib_list_make="" ++ ++ # This one goes to tm_defines, for use in loongarch-driver.c. ++ loongarch_multilib_list_c="" ++ ++ # ${with_multilib_list} should not contain whitespaces, ++ # consecutive commas or slashes. ++ if echo "${with_multilib_list}" \ ++ | grep -E -e "[[:space:]]" -e '[,/][,/]' -e '[,/]$' -e '^[,/]' > /dev/null 2>&1; then ++ echo "Invalid argument to --with-multilib-list." 1>&2 ++ exit 1 ++ fi ++ ++ unset component elem_abi_base elem_abi_ext elem_tmp parse_state all_abis ++ for elem in $(echo "${with_multilib_list}" | tr ',' ' '); do ++ unset elem_abi_base elem_abi_ext ++ parse_state="abi-base" ++ ++ for component in $(echo "${elem}" | tr '/' ' '); do ++ case ${parse_state} in ++ abi-base) ++ # Base ABI type ++ case ${component} in ++ lp64 | lp64d) elem_tmp="ABI_BASE_LP64D,";; ++ lp64f) elem_tmp="ABI_BASE_LP64F,";; ++ lp64s) elem_tmp="ABI_BASE_LP64S,";; ++ *) ++ echo "Unknown base ABI \"${component}\" in --with-multilib-list." 1>&2 ++ exit 1 ++ ;; ++ esac ++ loongarch_multilib_list_c="${loongarch_multilib_list_c}${elem_tmp}" ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}mabi=${component}" ++ elem_abi_base="${component}" ++ ++ parse_state="abi-ext" ++ ;; ++ ++ abi-ext) ++ # ABI extension type ++ case ${component} in ++ base) ++ elem_abi_ext="base" ++ loongarch_multilib_list_c="${loongarch_multilib_list_c}ABI_EXT_BASE," ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}" # Add nothing for now. ++ parse_state="arch" ++ continue; ++ ;; ++ esac ++ ++ # The default ABI extension is "base" if unspecified. ++ elem_abi_ext="base" ++ loongarch_multilib_list_c="${loongarch_multilib_list_c}ABI_EXT_BASE," ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}" # Add nothing for now. ++ parse_state="arch" ++ ;& ++ ++ arch) ++ # -march option ++ case ${component} in ++ abi-default | loongarch64 | la[2346]64) # OK, append here. ++ # Append -march spec for each multilib variant. ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}" ++ ;& ++ ++ default) ++ # "/default" is equivalent to --with-multilib-default=fixed ++ parse_state="opts" ++ continue; ++ ;; ++ esac ++ ++ # If ARCH is unspecified for this multilib variant, use ${with_multllib_default}. ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}${with_multilib_default}" ++ parse_state="opts" ++ ;& ++ ++ opts) ++ # Other compiler options for building libraries. ++ # (no static sanity check performed) ++ case ${component} in ++ *) ++ # Append other components as additional build options ++ # (without the prepending dash). ++ # Their validity should be examined by the compiler. ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}/${component}" ++ ;; ++ esac ++ ;; ++ ++ esac ++ done ++ ++ case ${parse_state} in ++ "abi-ext") ++ elem_abi_ext="base" ++ loongarch_multilib_list_c="${loongarch_multilib_list_c}ABI_EXT_BASE," ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}" # Add nothing for now. ++ ;& ++ "arch") ++ # If ARCH is unspecified for this multilib variant, use ${with_multllib_default}. ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}${with_multilib_default}" ++ ;& ++ "opts") ++ ;; ++ esac ++ ++ # Check for repeated configuration of the same multilib variant. ++ if echo "${elem_abi_base}/${elem_abi_ext}" \ ++ | grep -E "^(${all_abis%|})$" >/dev/null 2>&1; then ++ echo "Repeated multilib config of \"${elem_abi_base}/${elem_abi_ext}\" in --with-multilib-list." ++ exit 1 ++ fi ++ all_abis+="${elem_abi_base}/${elem_abi_ext}|" ++ ++ ++ # Check if the default ABI configuration of the GCC binary ++ # is included in the enabled multilib variants. ++ if test x${elem_abi_base} = x${with_abi} \ ++ && test x${elem_abi_ext} = x${with_abiext}; then ++ loongarch_multilib_list_sane=yes ++ fi ++ loongarch_multilib_list_make="${loongarch_multilib_list_make}," ++ done ++ unset component elem_abi_base elem_abi_ext elem_tmp parse_state all_abis ++ ++ ++ # Check if the default ABI combination is in the default list. ++ if test x${loongarch_multilib_list_sane} = xno; then ++ if test x${with_abiext} = xbase; then ++ with_abiext="" ++ else ++ with_abiext="/${with_abiext}" ++ fi ++ ++ echo "Default ABI combination (${with_abi}${with_abiext})" \ ++ "not found in --with-multilib-list." 1>&2 ++ exit 1 ++ fi ++ ++ # Remove the excessive appending comma. ++ loongarch_multilib_list_c=${loongarch_multilib_list_c%,} ++ loongarch_multilib_list_make=${loongarch_multilib_list_make%,} + ;; + + nds32*-*-*) +@@ -4935,17 +5295,54 @@ case ${target} in + ;; + + loongarch*-*-*) +- case ${default_loongarch_arch} in +- loongarch64) tm_defines="$tm_defines LARCH_ISA_DEFAULT=0" ;; +- loongarch32) tm_defines="$tm_defines LARCH_ISA_DEFAULT=1" ;; ++ # Export canonical triplet. ++ tm_defines="${tm_defines} LA_MULTIARCH_TRIPLET=${la_canonical_triplet}" ++ ++ # Define macro LA_DISABLE_MULTILIB if --disable-multilib ++ tm_defines="${tm_defines} TM_MULTILIB_LIST=${loongarch_multilib_list_c}" ++ if test x$enable_multilib = xyes; then ++ TM_MULTILIB_CONFIG="${loongarch_multilib_list_make}" ++ else ++ tm_defines="${tm_defines} LA_DISABLE_MULTILIB" ++ fi ++ ++ # Let --with- flags initialize the enum variables from loongarch.opt. ++ # See macro definitions from loongarch-opts.h and loongarch-cpu.h. ++ ++ # Architecture ++ tm_defines="${tm_defines} DEFAULT_CPU_ARCH=CPU_$(tr a-z- A-Z_ <<< ${with_arch})" ++ ++ # Base ABI type ++ tm_defines="${tm_defines} DEFAULT_ABI_BASE=ABI_BASE_$(tr a-z- A-Z_ <<< ${with_abi})" ++ ++ # ABI Extension ++ case ${with_abiext} in ++ base) tm_defines="${tm_defines} DEFAULT_ABI_EXT=ABI_EXT_BASE" ;; + esac +- case ${default_loongarch_abi} in +- lp64) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP64" ;; +- lp32) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP32" ;; ++ ++ # Microarchitecture ++ if test x${with_tune} != x; then ++ tm_defines="${tm_defines} DEFAULT_CPU_TUNE=CPU_$(tr a-z- A-Z_ <<< ${with_tune})" ++ fi ++ ++ # FPU adjustment ++ case ${with_fpu} in ++ none) tm_defines="$tm_defines DEFAULT_ISA_EXT_FPU=ISA_EXT_NONE" ;; ++ 32) tm_defines="$tm_defines DEFAULT_ISA_EXT_FPU=ISA_EXT_FPU32" ;; ++ 64) tm_defines="$tm_defines DEFAULT_ISA_EXT_FPU=ISA_EXT_FPU64" ;; + esac ++ ++ # SIMD extensions ++ case ${with_simd} in ++ none) tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_NONE" ;; ++ lsx) tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LSX" ;; ++ lasx) tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LASX" ;; ++ esac ++ + tmake_file="loongarch/t-loongarch $tmake_file" + ;; + ++ + powerpc*-*-* | rs6000-*-*) + # FIXME: The PowerPC port uses the value set at compile time, + # although it's only cosmetic. +diff --git a/gcc/config.host b/gcc/config.host +index d23dae4ac..c65569da2 100644 +--- a/gcc/config.host ++++ b/gcc/config.host +@@ -139,18 +139,6 @@ case ${host} in + host_extra_gcc_objs="driver-native.o" + host_xmake_file="${host_xmake_file} mips/x-native" + ;; +- loongarch*-*-linux*) +- host_extra_gcc_objs="driver-native.o" +- host_xmake_file="${host_xmake_file} loongarch/x-native" +- ;; +- esac +- ;; +- loongarch*-*-linux*) +- case ${target} in +- loongarch*-*-linux*) +- host_extra_gcc_objs="driver-native.o" +- host_xmake_file="${host_xmake_file} loongarch/x-native" +- ;; + esac + ;; + rs6000-*-* \ +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index ae8596107..82c0ccf37 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -1,5 +1,6 @@ +-;; Constraint definitions for LARCH. +-;; Copyright (C) 2006-2018 Free Software Foundation, Inc. ++;; Constraint definitions for LoongArch. ++;; Copyright (C) 2020-2022 Free Software Foundation, Inc. ++;; Contributed by Loongson Co. Ltd. + ;; + ;; This file is part of GCC. + ;; +@@ -20,160 +21,158 @@ + ;; Register constraints + + ;; "a" A constant call global and noplt address. +-;; "b" ALL_REGS ++;; "b" <-----unused + ;; "c" A constant call local address. +-;; "d" - +-;; "e" JALR_REGS ++;; "d" <-----unused ++;; "e" JIRL_REGS + ;; "f" FP_REGS +-;; "g" * ++;; "g" <-----unused + ;; "h" A constant call plt address. +-;; "i" "Matches a general integer constant." ++;; "i" Matches a general integer constant. (Global non-architectural) + ;; "j" SIBCALL_REGS +-;; "k" * +-;; "l" "A signed 16-bit constant ." +-;; "m" "A memory operand whose address is formed by a base register and offset +-;; that is suitable for use in instructions with the same addressing mode +-;; as @code{st.w} and @code{ld.w}." +-;; "n" "Matches a non-symbolic integer constant." +-;; "o" "Matches an offsettable memory reference." +-;; "p" "Matches a general address." +-;; "q" CSR_REGS +-;; "r" GENERAL_REGS +-;; "s" "Matches a symbolic integer constant." ++;; "k" A memory operand whose address is formed by a base register and ++;; (optionally scaled) index register. ++;; "l" A signed 16-bit constant. ++;; "m" A memory operand whose address is formed by a base register and offset ++;; that is suitable for use in instructions with the same addressing mode ++;; as @code{st.w} and @code{ld.w}. ++;; "n" Matches a non-symbolic integer constant. (Global non-architectural) ++;; "o" Matches an offsettable memory reference. (Global non-architectural) ++;; "p" Matches a general address. (Global non-architectural) ++;; "q" A general-purpose register except for $r0 and $r1 for lcsr. ++;; "r" GENERAL_REGS (Global non-architectural) ++;; "s" Matches a symbolic integer constant. (Global non-architectural) + ;; "t" A constant call weak address +-;; "u" - +-;; "v" - +-;; "w" "Matches any valid memory." +-;; "x" - +-;; "y" GR_REGS +-;; "z" ST_REGS +-;; "A" - +-;; "B" - +-;; "C" - +-;; "D" - +-;; "E" "Matches a floating-point constant." +-;; "F" "Matches a floating-point constant." +-;; "G" "Floating-point zero." +-;; "H" - +-;; "I" "A signed 12-bit constant (for arithmetic instructions)." +-;; "J" "Integer zero." +-;; "K" "An unsigned 12-bit constant (for logic instructions)." +-;; "L" "A signed 32-bit constant in which the lower 12 bits are zero. +-;; "M" "A constant that cannot be loaded using @code{lui}, @code{addiu} or @code{ori}." +-;; "N" "A constant in the range -65535 to -1 (inclusive)." +-;; "O" "A signed 15-bit constant." +-;; "P" "A constant in the range 1 to 65535 (inclusive)." +-;; "Q" "A signed 12-bit constant" +-;; "R" "An address that can be used in a non-macro load or store." +-;; "S" "A constant call address." +-;; "T" - +-;; "U" - +-;; "V" "Matches a non-offsettable memory reference." +-;; "W" "A memory address based on a member of @code{BASE_REG_CLASS}. This is +-;; true for all references (although it can sometimes be implicit +-;; if @samp{!TARGET_EXPLICIT_RELOCS})." +-;; "X" "Matches anything." ++;; "u" A signed 52bit constant and low 32-bit is zero (for logic instructions) ++;; "v" A signed 64-bit constant and low 44-bit is zero (for logic instructions) ++;; "w" Matches any valid memory. ++;; "x" <-----unused ++;; "y" <-----unused ++;; "z" FCC_REGS ++;; "A" <-----unused ++;; "B" <-----unused ++;; "C" <-----unused ++;; "D" <-----unused ++;; "E" Matches a floating-point constant. (Global non-architectural) ++;; "F" Matches a floating-point constant. (Global non-architectural) ++;; "G" Floating-point zero. ++;; "H" <-----unused ++;; "I" A signed 12-bit constant (for arithmetic instructions). ++;; "J" Integer zero. ++;; "K" An unsigned 12-bit constant (for logic instructions). ++;; "L" <-----unused ++;; "M" <-----unused ++;; "N" <-----unused ++;; "O" <-----unused ++;; "P" <-----unused ++;; "Q" <-----unused ++;; "R" <-----unused ++;; "S" <-----unused ++;; "T" <-----unused ++;; "U" <-----unused ++;; "V" Matches a non-offsettable memory reference. (Global non-architectural) ++;; "W" <-----unused ++;; "X" Matches anything. (Global non-architectural) + ;; "Y" - +-;; "YG" +-;; "A vector zero." +-;; "YA" +-;; "An unsigned 6-bit constant." +-;; "YB" +-;; "A signed 10-bit constant." +-;; "Yb" + ;; "Yd" +-;; "A constant @code{move_operand} that can be safely loaded into @code{$25} +-;; using @code{la}." +-;; "Yh" +-;; "Yw" ++;; A constant @code{move_operand} that can be safely loaded using ++;; @code{la}. ++;; "YG" ++;; A vector zero. + ;; "Yx" +-;; "YI" +-;; "A replicated vector const in which the replicated value is in the range +-;; [-512,511]." + ;; "YC" +-;; "A replicated vector const in which the replicated value has a single +-;; bit set." ++;; A replicated vector const in which the replicated value has a single ++;; bit set. + ;; "YZ" +-;; "A replicated vector const in which the replicated value has a single +-;; bit clear." ++;; A replicated vector const in which the replicated value has a single ++;; bit clear. + ;; "Z" - + ;; "ZC" +-;; "A memory operand whose address is formed by a base register and offset ++;; A memory operand whose address is formed by a base register and offset + ;; that is suitable for use in instructions with the same addressing mode +-;; as @code{ll.w} and @code{sc.w}." +-;; "ZD" +-;; "An address suitable for a @code{prefetch} instruction, or for any other +-;; instruction with the same addressing mode as @code{prefetch}." +-;; "ZR" +-;; "An address valid for loading/storing register exclusive" ++;; as @code{ll.w} and @code{sc.w}. + ;; "ZB" +-;; "An address that is held in a general-purpose register. +-;; The offset is zero" ++;; An address that is held in a general-purpose register. ++;; The offset is zero. ++;; "<" Matches a pre-dec or post-dec operand. (Global non-architectural) ++;; ">" Matches a pre-inc or post-inc operand. (Global non-architectural) + ++(define_constraint "a" ++ "@internal ++ A constant call global and noplt address." ++ (match_operand 0 "is_const_call_global_noplt_symbol")) + + (define_constraint "c" + "@internal + A constant call local address." + (match_operand 0 "is_const_call_local_symbol")) + +-(define_constraint "a" +- "@internal +- A constant call global and noplt address." +- (match_operand 0 "is_const_call_global_noplt_symbol")) ++(define_register_constraint "e" "JIRL_REGS" ++ "@internal") ++ ++(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS" ++ "A floating-point register (if available).") + + (define_constraint "h" + "@internal + A constant call plt address." + (match_operand 0 "is_const_call_plt_symbol")) + +-(define_constraint "t" +- "@internal +- A constant call weak address." +- (match_operand 0 "is_const_call_weak_symbol")) +- +-(define_register_constraint "e" "JALR_REGS" ++(define_register_constraint "j" "SIBCALL_REGS" + "@internal") + +-(define_register_constraint "q" "CSR_REGS" +- "A general-purpose register except for $r0 and $r1 for csr.") ++(define_memory_constraint "k" ++ "A memory operand whose address is formed by a base register and (optionally scaled) ++ index register." ++ (and (match_code "mem") ++ (match_test "loongarch_base_index_address_p (XEXP (op, 0), mode)"))) + +-(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS" +- "A floating-point register (if available).") ++(define_constraint "l" ++"A signed 16-bit constant." ++(and (match_code "const_int") ++ (match_test "IMM16_OPERAND (ival)"))) + +-(define_register_constraint "b" "ALL_REGS" +- "@internal") ++(define_memory_constraint "m" ++ "A memory operand whose address is formed by a base register and offset ++ that is suitable for use in instructions with the same addressing mode ++ as @code{st.w} and @code{ld.w}." ++ (and (match_code "mem") ++ (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), mode)"))) + +-(define_register_constraint "j" "SIBCALL_REGS" +- "@internal") ++(define_register_constraint "q" "CSR_REGS" ++ "A general-purpose register except for $r0 and $r1 for lcsr.") + +-(define_constraint "l" +- "A signed 16-bit constant ." ++(define_constraint "t" ++ "@internal ++ A constant call weak address." ++ (match_operand 0 "is_const_call_weak_symbol")) ++ ++(define_constraint "u" ++ "A signed 52bit constant and low 32-bit is zero (for logic instructions)." + (and (match_code "const_int") +- (match_test "IMM16_OPERAND (ival)"))) ++ (match_test "LU32I_OPERAND (ival)"))) + +-(define_register_constraint "y" "GR_REGS" +- "Equivalent to @code{r}; retained for backwards compatibility.") ++(define_constraint "v" ++ "A signed 64-bit constant and low 52-bit is zero (for logic instructions)." ++ (and (match_code "const_int") ++ (match_test "LU52I_OPERAND (ival)"))) + +-(define_register_constraint "z" "ST_REGS" ++(define_register_constraint "z" "FCC_REGS" + "A floating-point condition code register.") + +-(define_constraint "kf" +- "@internal" +- (match_operand 0 "force_to_mem_operand")) ++;; Floating-point constraints + +-;; This is a normal rather than a register constraint because we can +-;; never use the stack pointer as a reload register. +-(define_constraint "ks" +- "@internal" +- (and (match_code "reg") +- (match_test "REGNO (op) == STACK_POINTER_REGNUM"))) ++(define_constraint "G" ++ "Floating-point zero." ++ (and (match_code "const_double") ++ (match_test "op == CONST0_RTX (mode)"))) + + ;; Integer constraints + + (define_constraint "I" + "A signed 12-bit constant (for arithmetic instructions)." + (and (match_code "const_int") +- (match_test "SMALL_OPERAND (ival)"))) ++ (match_test "IMM12_OPERAND (ival)"))) + + (define_constraint "J" + "Integer zero." +@@ -183,53 +182,7 @@ + (define_constraint "K" + "An unsigned 12-bit constant (for logic instructions)." + (and (match_code "const_int") +- (match_test "SMALL_OPERAND_UNSIGNED (ival)"))) +- +-(define_constraint "u" +- "An unsigned 12-bit constant (for logic instructions)." +- (and (match_code "const_int") +- (match_test "LU32I_OPERAND (ival)"))) +- +-(define_constraint "v" +- "An unsigned 12-bit constant (for logic instructions)." +- (and (match_code "const_int") +- (match_test "LU52I_OPERAND (ival)"))) +- +-(define_constraint "L" +- "A signed 32-bit constant in which the lower 12 bits are zero. +- Such constants can be loaded using @code{lui}." +- (and (match_code "const_int") +- (match_test "LUI_OPERAND (ival)"))) +- +-(define_constraint "M" +- "A constant that cannot be loaded using @code{lui}, @code{addiu} +- or @code{ori}." +- (and (match_code "const_int") +- (not (match_test "SMALL_OPERAND (ival)")) +- (not (match_test "SMALL_OPERAND_UNSIGNED (ival)")) +- (not (match_test "LUI_OPERAND (ival)")))) +- +-(define_constraint "N" +- "A constant in the range -65535 to -1 (inclusive)." +- (and (match_code "const_int") +- (match_test "ival >= -0xffff && ival < 0"))) +- +-(define_constraint "O" +- "A signed 15-bit constant." +- (and (match_code "const_int") +- (match_test "ival >= -0x4000 && ival < 0x4000"))) +- +-(define_constraint "P" +- "A constant in the range 1 to 65535 (inclusive)." +- (and (match_code "const_int") +- (match_test "ival > 0 && ival < 0x10000"))) +- +-;; Floating-point constraints +- +-(define_constraint "G" +- "Floating-point zero." +- (and (match_code "const_double") +- (match_test "op == CONST0_RTX (mode)"))) ++ (match_test "IMM12_OPERAND_UNSIGNED (ival)"))) + + ;; General constraints + +@@ -237,33 +190,35 @@ + "@internal" + (match_operand 0 "const_arith_operand")) + +-(define_memory_constraint "R" +- "An address that can be used in a non-macro load or store." +- (and (match_code "mem") +- (match_test "loongarch_address_insns (XEXP (op, 0), mode, false) == 1"))) ++(define_constraint "Yd" ++ "@internal ++ A constant @code{move_operand} that can be safely loaded using ++ @code{la}." ++ (and (match_operand 0 "move_operand") ++ (match_test "CONSTANT_P (op)"))) + +-(define_memory_constraint "m" ++(define_constraint "Yx" ++ "@internal" ++ (match_operand 0 "low_bitmask_operand")) ++ ++(define_memory_constraint "ZC" + "A memory operand whose address is formed by a base register and offset + that is suitable for use in instructions with the same addressing mode +- as @code{st.w} and @code{ld.w}." ++ as @code{ll.w} and @code{sc.w}." + (and (match_code "mem") +- (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), mode)"))) ++ (match_test "loongarch_14bit_shifted_offset_address_p (XEXP (op, 0), mode)"))) + +-(define_constraint "S" ++(define_memory_constraint "ZB" + "@internal +- A constant call address." +- (and (match_operand 0 "call_insn_operand") +- (match_test "CONSTANT_P (op)"))) ++ An address that is held in a general-purpose register. ++ The offset is zero" ++ (and (match_code "mem") ++ (match_test "REG_P (XEXP (op, 0))"))) + +-(define_memory_constraint "W" +- "@internal +- A memory address based on a member of @code{BASE_REG_CLASS}. This is +- true for allreferences (although it can sometimes be implicit +- if @samp{!TARGET_EXPLICIT_RELOCS})." ++(define_memory_constraint "R" ++ "An address that can be used in a non-macro load or store." + (and (match_code "mem") +- (match_operand 0 "memory_operand") +- (and (not (match_operand 0 "stack_operand")) +- (not (match_test "CONSTANT_P (XEXP (op, 0))"))))) ++ (match_test "loongarch_address_insns (XEXP (op, 0), mode, false) == 1"))) + + (define_constraint "YG" + "@internal +@@ -271,41 +226,6 @@ + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +-(define_constraint "YA" +- "@internal +- An unsigned 6-bit constant." +- (and (match_code "const_int") +- (match_test "UIMM6_OPERAND (ival)"))) +- +-(define_constraint "YB" +- "@internal +- A signed 10-bit constant." +- (and (match_code "const_int") +- (match_test "IMM10_OPERAND (ival)"))) +- +-(define_constraint "Yb" +- "@internal" +- (match_operand 0 "qi_mask_operand")) +- +-(define_constraint "Yd" +- "@internal +- A constant @code{move_operand} that can be safely loaded into @code{$25} +- using @code{la}." +- (and (match_operand 0 "move_operand") +- (match_test "CONSTANT_P (op)"))) +- +-(define_constraint "Yh" +- "@internal" +- (match_operand 0 "hi_mask_operand")) +- +-(define_constraint "Yw" +- "@internal" +- (match_operand 0 "si_mask_operand")) +- +-(define_constraint "Yx" +- "@internal" +- (match_operand 0 "low_bitmask_operand")) +- + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +@@ -360,30 +280,3 @@ + A replicated vector const with replicated byte values as well as elements" + (and (match_code "const_vector") + (match_test "loongarch_const_vector_same_bytes_p (op, mode)"))) +- +-(define_memory_constraint "ZC" +- "A memory operand whose address is formed by a base register and offset +- that is suitable for use in instructions with the same addressing mode +- as @code{ll.w} and @code{sc.w}." +- (and (match_code "mem") +- (match_test "loongarch_14bit_shifted_offset_address_p (XEXP (op, 0), mode)"))) +- +-;;(define_address_constraint "ZD" +-;; "An address suitable for a @code{prefetch} instruction, or for any other +-;; instruction with the same addressing mode as @code{prefetch}." +-;; (if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT") +-;; (match_test "loongarch_9bit_offset_address_p (op, mode)") +-;; (match_test "loongarch_address_insns (op, mode, false)"))) +- +-(define_memory_constraint "ZR" +- "@internal +- An address valid for loading/storing register exclusive" +- (match_operand 0 "mem_noofs_operand")) +- +-(define_memory_constraint "ZB" +- "@internal +- An address that is held in a general-purpose register. +- The offset is zero" +- (and (match_code "mem") +- (match_test "GET_CODE(XEXP(op,0)) == REG"))) +- +diff --git a/gcc/config/loongarch/driver-native.c b/gcc/config/loongarch/driver-native.c +deleted file mode 100644 +index 5484ee502..000000000 +--- a/gcc/config/loongarch/driver-native.c ++++ /dev/null +@@ -1,82 +0,0 @@ +-/* Subroutines for the gcc driver. +- Copyright (C) 2008-2018 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-. */ +- +-#define IN_TARGET_CODE 1 +- +-#include "config.h" +-#include "system.h" +-#include "coretypes.h" +-#include "tm.h" +- +- +-/* This function must set to noinline. Otherwise the arg can not be passed. */ +-int loongson_cpucfg (int arg) +-{ +- int ret; +- __asm__ __volatile__ ("cpucfg %0,%1\n\t" /* cpucfg $2,$4. */ +- :"=r"(ret) +- :"r"(arg) +- :); +- return ret; +-} +- +-/* This will be called by the spec parser in gcc.c when it sees +- a %:local_cpu_detect(args) construct. Currently it will be called +- with either "arch" or "tune" as argument depending on if -march=native +- or -mtune=native is to be substituted. +- +- It returns a string containing new command line parameters to be +- put at the place of the above two options, depending on what CPU +- this is executed. E.g. "-march=loongson2f" on a Loongson 2F for +- -march=native. If the routine can't detect a known processor, +- the -march or -mtune option is discarded. +- +- ARGC and ARGV are set depending on the actual arguments given +- in the spec. */ +-const char * +-host_detect_local_cpu (int argc, const char **argv) +-{ +- const char *cpu = NULL; +- bool arch; +- int cpucfg_arg; +- int cpucfg_ret; +- +- if (argc < 1) +- return NULL; +- +- arch = strcmp (argv[0], "arch") == 0; +- if (!arch && strcmp (argv[0], "tune")) +- return NULL; +- +- cpucfg_arg = 0; +- cpucfg_ret = loongson_cpucfg (cpucfg_arg); +- if (((cpucfg_ret >> 16) & 0xff) == 0x14) +- { +- if (((cpucfg_ret >> 8) & 0xff) == 0xc0) +- cpu = "la464"; +- else +- cpu = NULL; +- } +- +- +- if (cpu == NULL) +- return NULL; +- +- return concat ("-m", argv[0], "=", cpu, NULL); +-} +diff --git a/gcc/config/loongarch/elf.h b/gcc/config/loongarch/elf.h +index b7f938e31..edb0e77d2 100644 +--- a/gcc/config/loongarch/elf.h ++++ b/gcc/config/loongarch/elf.h +@@ -1,5 +1,6 @@ +-/* Target macros for loongarch*-elf targets. +- Copyright (C) 1994-2018 Free Software Foundation, Inc. ++/* Definitions for LoongArch systems using GNU (glibc-based) userspace, ++ or other userspace with libc derived from glibc. ++ Copyright (C) 1998-2018 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -17,34 +18,37 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +-/* LARCH assemblers don't have the usual .set foo,bar construct; +- .set is used for assembler options instead. */ +-#undef SET_ASM_OP +-#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2) \ +- do \ +- { \ +- fputc ('\t', FILE); \ +- assemble_name (FILE, LABEL1); \ +- fputs (" = ", FILE); \ +- assemble_name (FILE, LABEL2); \ +- fputc ('\n', FILE); \ +- } \ +- while (0) +- +-#undef ASM_DECLARE_OBJECT_NAME +-#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name +- +-#undef ASM_FINISH_DECLARE_OBJECT +-#define ASM_FINISH_DECLARE_OBJECT loongarch_finish_declare_object +- +-/* Leave the linker script to choose the appropriate libraries. */ ++/* Define the size of the wide character type. */ ++#undef WCHAR_TYPE ++#define WCHAR_TYPE "int" ++ ++#undef WCHAR_TYPE_SIZE ++#define WCHAR_TYPE_SIZE 32 ++ ++ ++/* GNU-specific SPEC definitions. */ ++#define GNU_USER_LINK_EMULATION "elf" ABI_GRLEN_SPEC "loongarch" ++ ++#undef GNU_USER_TARGET_LINK_SPEC ++#define GNU_USER_TARGET_LINK_SPEC \ ++ "%{shared} -m " GNU_USER_LINK_EMULATION ++ ++ ++/* Link against Newlib libraries, because the ELF backend assumes Newlib. ++ Handle the circular dependence between libc and libgloss. */ + #undef LIB_SPEC +-#define LIB_SPEC "" ++#define LIB_SPEC "--start-group -lc %{!specs=nosys.specs:-lgloss} --end-group" ++ ++#undef LINK_SPEC ++#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC + + #undef STARTFILE_SPEC +-#define STARTFILE_SPEC "crti%O%s crtbegin%O%s" ++#define STARTFILE_SPEC "crt0%O%s crtbegin%O%s" + + #undef ENDFILE_SPEC +-#define ENDFILE_SPEC "crtend%O%s crtn%O%s" ++#define ENDFILE_SPEC "crtend%O%s" + + #define NO_IMPLICIT_EXTERN_C 1 ++#undef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC "%{profile:-p}" ++ +diff --git a/gcc/config/loongarch/frame-header-opt.c b/gcc/config/loongarch/frame-header-opt.c +deleted file mode 100644 +index 86e5d423d..000000000 +--- a/gcc/config/loongarch/frame-header-opt.c ++++ /dev/null +@@ -1,292 +0,0 @@ +-/* Analyze functions to determine if callers need to allocate a frame header +- on the stack. The frame header is used by callees to save their arguments. +- This optimization is specific to TARGET_OLDABI targets. For TARGET_NEWABI +- targets, if a frame header is required, it is allocated by the callee. +- +- +- Copyright (C) 2015-2018 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify it +-under the terms of the GNU General Public License as published by the +-Free Software Foundation; either version 3, or (at your option) any +-later version. +- +-GCC is distributed in the hope that it will be useful, but WITHOUT +-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-. */ +- +- +-#define IN_TARGET_CODE 1 +- +-#include "config.h" +-#include "system.h" +-#include "context.h" +-#include "coretypes.h" +-#include "tree.h" +-#include "tree-core.h" +-#include "tree-pass.h" +-#include "target.h" +-#include "target-globals.h" +-#include "profile-count.h" +-#include "cfg.h" +-#include "cgraph.h" +-#include "function.h" +-#include "basic-block.h" +-#include "gimple.h" +-#include "gimple-iterator.h" +-#include "gimple-walk.h" +- +-static unsigned int frame_header_opt (void); +- +-namespace { +- +-const pass_data pass_data_ipa_frame_header_opt = +-{ +- IPA_PASS, /* type */ +- "frame-header-opt", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_CGRAPHOPT, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- 0, /* todo_flags_finish */ +-}; +- +-class pass_ipa_frame_header_opt : public ipa_opt_pass_d +-{ +-public: +- pass_ipa_frame_header_opt (gcc::context *ctxt) +- : ipa_opt_pass_d (pass_data_ipa_frame_header_opt, ctxt, +- NULL, /* generate_summary */ +- NULL, /* write_summary */ +- NULL, /* read_summary */ +- NULL, /* write_optimization_summary */ +- NULL, /* read_optimization_summary */ +- NULL, /* stmt_fixup */ +- 0, /* function_transform_todo_flags_start */ +- NULL, /* function_transform */ +- NULL) /* variable_transform */ +- {} +- +- /* opt_pass methods: */ +- virtual bool gate (function *) +- { +- /* This optimization has no affect if TARGET_NEWABI. If optimize +- is not at least 1 then the data needed for the optimization is +- not available and nothing will be done anyway. */ +- return TARGET_OLDABI && flag_frame_header_optimization && optimize > 0; +- } +- +- virtual unsigned int execute (function *) { return frame_header_opt (); } +- +-}; // class pass_ipa_frame_header_opt +- +-} // anon namespace +- +-static ipa_opt_pass_d * +-make_pass_ipa_frame_header_opt (gcc::context *ctxt) +-{ +- return new pass_ipa_frame_header_opt (ctxt); +-} +- +-void +-loongarch_register_frame_header_opt (void) +-{ +- opt_pass *p = make_pass_ipa_frame_header_opt (g); +- struct register_pass_info f = { p, "comdats", 1, PASS_POS_INSERT_AFTER }; +- register_pass (&f); +-} +- +- +-/* Return true if it is certain that this is a leaf function. False if it is +- not a leaf function or if it is impossible to tell. */ +- +-static bool +-is_leaf_function (function *fn) +-{ +- basic_block bb; +- gimple_stmt_iterator gsi; +- +- /* If we do not have a cfg for this function be conservative and assume +- it is not a leaf function. */ +- if (fn->cfg == NULL) +- return false; +- +- FOR_EACH_BB_FN (bb, fn) +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- if (is_gimple_call (gsi_stmt (gsi))) +- return false; +- return true; +-} +- +-/* Return true if this function has inline assembly code or if we cannot +- be certain that it does not. False if we know that there is no inline +- assembly. */ +- +-static bool +-has_inlined_assembly (function *fn) +-{ +- basic_block bb; +- gimple_stmt_iterator gsi; +- +- /* If we do not have a cfg for this function be conservative and assume +- it is may have inline assembly. */ +- if (fn->cfg == NULL) +- return true; +- +- FOR_EACH_BB_FN (bb, fn) +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- if (gimple_code (gsi_stmt (gsi)) == GIMPLE_ASM) +- return true; +- +- return false; +-} +- +-/* Return true if this function will use the stack space allocated by its +- caller or if we cannot determine for certain that it does not. */ +- +-static bool +-needs_frame_header_p (function *fn) +-{ +- tree t; +- +- if (fn->decl == NULL) +- return true; +- +- if (fn->stdarg) +- return true; +- +- for (t = DECL_ARGUMENTS (fn->decl); t; t = TREE_CHAIN (t)) +- { +- if (!use_register_for_decl (t)) +- return true; +- +- /* Some 64-bit types may get copied to general registers using the frame +- header, see loongarch_output_64bit_xfer. Checking for SImode only may be +- overly restrictive but it is guaranteed to be safe. */ +- if (DECL_MODE (t) != SImode) +- return true; +- } +- +- return false; +-} +- +-/* Return true if the argument stack space allocated by function FN is used. +- Return false if the space is needed or if the need for the space cannot +- be determined. */ +- +-static bool +-callees_functions_use_frame_header (function *fn) +-{ +- basic_block bb; +- gimple_stmt_iterator gsi; +- gimple *stmt; +- tree called_fn_tree; +- function *called_fn; +- +- if (fn->cfg == NULL) +- return true; +- +- FOR_EACH_BB_FN (bb, fn) +- { +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- { +- stmt = gsi_stmt (gsi); +- if (is_gimple_call (stmt)) +- { +- called_fn_tree = gimple_call_fndecl (stmt); +- if (called_fn_tree != NULL) +- { +- called_fn = DECL_STRUCT_FUNCTION (called_fn_tree); +- if (called_fn == NULL +- || DECL_WEAK (called_fn_tree) +- || has_inlined_assembly (called_fn) +- || !is_leaf_function (called_fn) +- || !called_fn->machine->does_not_use_frame_header) +- return true; +- } +- else +- return true; +- } +- } +- } +- return false; +-} +- +-/* Set the callers_may_not_allocate_frame flag for any function which +- function FN calls because FN may not allocate a frame header. */ +- +-static void +-set_callers_may_not_allocate_frame (function *fn) +-{ +- basic_block bb; +- gimple_stmt_iterator gsi; +- gimple *stmt; +- tree called_fn_tree; +- function *called_fn; +- +- if (fn->cfg == NULL) +- return; +- +- FOR_EACH_BB_FN (bb, fn) +- { +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- { +- stmt = gsi_stmt (gsi); +- if (is_gimple_call (stmt)) +- { +- called_fn_tree = gimple_call_fndecl (stmt); +- if (called_fn_tree != NULL) +- { +- called_fn = DECL_STRUCT_FUNCTION (called_fn_tree); +- if (called_fn != NULL) +- called_fn->machine->callers_may_not_allocate_frame = true; +- } +- } +- } +- } +- return; +-} +- +-/* Scan each function to determine those that need its frame headers. Perform +- a second scan to determine if the allocation can be skipped because none of +- their callees require the frame header. */ +- +-static unsigned int +-frame_header_opt () +-{ +- struct cgraph_node *node; +- function *fn; +- +- FOR_EACH_DEFINED_FUNCTION (node) +- { +- fn = node->get_fun (); +- if (fn != NULL) +- fn->machine->does_not_use_frame_header = !needs_frame_header_p (fn); +- } +- +- FOR_EACH_DEFINED_FUNCTION (node) +- { +- fn = node->get_fun (); +- if (fn != NULL) +- fn->machine->optimize_call_stack +- = !callees_functions_use_frame_header (fn) && !is_leaf_function (fn); +- } +- +- FOR_EACH_DEFINED_FUNCTION (node) +- { +- fn = node->get_fun (); +- if (fn != NULL && fn->machine->optimize_call_stack) +- set_callers_may_not_allocate_frame (fn); +- } +- +- return 0; +-} +diff --git a/gcc/config/loongarch/generic.md b/gcc/config/loongarch/generic.md +index 321b8e561..0f6eb3f42 100644 +--- a/gcc/config/loongarch/generic.md ++++ b/gcc/config/loongarch/generic.md +@@ -1,6 +1,8 @@ +-;; Generic DFA-based pipeline description for LARCH targets +-;; Copyright (C) 2004-2018 Free Software Foundation, Inc. +-;; ++;; Generic DFA-based pipeline description for LoongArch targets ++;; Copyright (C) 2020-2022 Free Software Foundation, Inc. ++;; Contributed by Loongson Co. Ltd. ++;; Based on MIPS target for GNU compiler. ++ + ;; This file is part of GCC. + + ;; GCC is free software; you can redistribute it and/or modify it +@@ -17,9 +19,16 @@ + ;; along with GCC; see the file COPYING3. If not see + ;; . + ++(define_automaton "alu,imuldiv") ++ ++(define_cpu_unit "alu" "alu") ++(define_cpu_unit "imuldiv" "imuldiv") + +-;; This file is derived from the old define_function_unit description. +-;; Each reservation can be overridden on a processor-by-processor basis. ++;; Ghost instructions produce no real code. ++;; They exist purely to express an effect on dataflow. ++(define_insn_reservation "ghost" 0 ++ (eq_attr "type" "ghost") ++ "nothing") + + (define_insn_reservation "generic_alu" 1 + (eq_attr "type" "unknown,prefetch,prefetchx,condmove,const,arith, +@@ -43,7 +52,7 @@ + "alu") + + (define_insn_reservation "generic_imul" 17 +- (eq_attr "type" "imul,imul3") ++ (eq_attr "type" "imul") + "imuldiv*17") + + (define_insn_reservation "generic_fcvt" 1 +diff --git a/gcc/config/loongarch/genopt.sh b/gcc/config/loongarch/genopt.sh +deleted file mode 100644 +index 272aac51d..000000000 +--- a/gcc/config/loongarch/genopt.sh ++++ /dev/null +@@ -1,110 +0,0 @@ +-#!/bin/sh +-# Generate loongarch-tables.opt from the list of CPUs in loongarch-cpus.def. +-# Copyright (C) 2011-2018 Free Software Foundation, Inc. +-# +-# This file is part of GCC. +-# +-# GCC is free software; you can redistribute it and/or modify +-# it under the terms of the GNU General Public License as published by +-# the Free Software Foundation; either version 3, or (at your option) +-# any later version. +-# +-# GCC is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with GCC; see the file COPYING3. If not see +-# . +- +-cat <. +- +-Enum +-Name(loongarch_arch_opt_value) Type(int) +-Known LARCH CPUs (for use with the -march= and -mtune= options): +- +-EnumValue +-Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly +- +-EOF +- +-awk -F'[(, ]+' ' +-BEGIN { +- value = 0 +-} +- +-# Write an entry for a single string accepted as a -march= argument. +- +-function write_one_arch_value(name, value, flags) +-{ +- print "EnumValue" +- print "Enum(loongarch_arch_opt_value) String(" name ") Value(" value ")" flags +- print "" +-} +- +-# The logic for matching CPU name variants should be the same as in GAS. +- +-# Write an entry for a single string accepted as a -march= argument, +-# plus any variant with a final "000" replaced by "k". +- +-function write_arch_value_maybe_k(name, value, flags) +-{ +- write_one_arch_value(name, value, flags) +- if (name ~ "000$") { +- sub("000$", "k", name) +- write_one_arch_value(name, value, "") +- } +-} +- +-# Write all the entries for a -march= argument. In addition to +-# replacement of a final "000" with "k", an argument starting with +-# "vr", "rm" or "r" followed by a number, or just a plain number, +-# matches a plain number or "r" followed by a plain number. +- +-function write_all_arch_values(name, value) +-{ +- write_arch_value_maybe_k(name, value, " Canonical") +- cname = name +- if (cname ~ "^vr") { +- sub("^vr", "", cname) +- } else if (cname ~ "^rm") { +- sub("^rm", "", cname) +- } else if (cname ~ "^r") { +- sub("^r", "", cname) +- } +- if (cname ~ "^[0-9]") { +- if (cname != name) +- write_arch_value_maybe_k(cname, value, "") +- rname = "r" cname +- if (rname != name) +- write_arch_value_maybe_k(rname, value, "") +- } +-} +- +-/^LARCH_CPU/ { +- name = $2 +- gsub("\"", "", name) +- write_all_arch_values(name, value) +- value++ +-}' $1/loongarch-cpus.def +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +new file mode 100755 +index 000000000..e895f7ec8 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -0,0 +1,104 @@ ++#!/bin/sh ++# A simple script that generates loongarch-str.h and loongarch.opt ++# from genopt/loongarch-optstr. ++# ++# Copyright (C) 2020-2022 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it under ++# the terms of the GNU General Public License as published by the Free ++# Software Foundation; either version 3, or (at your option) any later ++# version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++cd "$(dirname "$0")" ++ ++# Generate a header containing definitions from the string table. ++gen_defines() { ++ cat <. */ ++ ++#ifndef LOONGARCH_STR_H ++#define LOONGARCH_STR_H ++EOF ++ ++ sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \ ++ -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@#define \1 "\2"@' \ ++ loongarch-strings ++ ++ echo ++ echo "#endif /* LOONGARCH_STR_H */" ++} ++ ++ ++# Substitute all "@@@@" to "" in loongarch.opt.in ++# according to the key-value pairs defined in loongarch-strings. ++ ++gen_options() { ++ ++ sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \ ++ -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@\1="\2"@' \ ++ loongarch-strings | { \ ++ ++ # read the definitions ++ while read -r line; do ++ eval "$line" ++ done ++ ++ # print a header ++ cat << EOF ++; Generated by "genstr" from the template "loongarch.opt.in" ++; and definitions from "loongarch-strings". ++; ++; Please do not edit this file directly. ++; It will be automatically updated during a gcc build ++; if you change "loongarch.opt.in" or "loongarch-strings". ++; ++EOF ++ ++ # make the substitutions ++ sed -e 's@"@\\"@g' -e 's/@@\([^@]\+\)@@/${\1}/g' loongarch.opt.in | \ ++ while read -r line; do ++ eval "echo \"$line\"" ++ done ++ } ++} ++ ++main() { ++ case "$1" in ++ header) gen_defines;; ++ opt) gen_options;; ++ *) echo "Unknown Command: \"$1\". Available: header, opt"; exit 1;; ++ esac ++} ++ ++main "$@" +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +new file mode 100644 +index 000000000..d79e2e791 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -0,0 +1,68 @@ ++# Defines the key strings for LoongArch compiler options. ++# ++# Copyright (C) 2020-2022 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it under ++# the terms of the GNU General Public License as published by the Free ++# Software Foundation; either version 3, or (at your option) any later ++# version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++# -march= / -mtune= ++OPTSTR_ARCH arch ++OPTSTR_TUNE tune ++ ++STR_CPU_NATIVE native ++STR_CPU_ABI_DEFAULT abi-default ++STR_CPU_LOONGARCH64 loongarch64 ++STR_CPU_LA464 la464 ++STR_CPU_LA364 la364 ++STR_CPU_LA264 la264 ++STR_CPU_LA664 la664 ++ ++# Base architecture ++STR_ISA_BASE_LA64V100 la64 ++ ++# -mfpu ++OPTSTR_ISA_EXT_FPU fpu ++STR_NONE none ++STR_ISA_EXT_FPU0 0 ++STR_ISA_EXT_FPU32 32 ++STR_ISA_EXT_FPU64 64 ++ ++OPTSTR_SOFT_FLOAT soft-float ++OPTSTR_SINGLE_FLOAT single-float ++OPTSTR_DOUBLE_FLOAT double-float ++ ++# SIMD extensions ++OPTSTR_ISA_EXT_SIMD simd ++STR_ISA_EXT_LSX lsx ++STR_ISA_EXT_LASX lasx ++ ++# -mabi= ++OPTSTR_ABI_BASE abi ++STR_ABI_BASE_LP64D lp64d ++STR_ABI_BASE_LP64F lp64f ++STR_ABI_BASE_LP64S lp64s ++STR_ABI_BASE_LP64 lp64 ++ ++# ABI extension types ++STR_ABI_EXT_BASE base ++ ++# -mcmodel= ++OPTSTR_CMODEL cmodel ++STR_CMODEL_NORMAL normal ++STR_CMODEL_TINY tiny ++STR_CMODEL_TS tiny-static ++STR_CMODEL_LARGE large ++STR_CMODEL_EXTREME extreme +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +new file mode 100644 +index 000000000..463dfec77 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -0,0 +1,242 @@ ++; Generated by "genstr" from the template "loongarch.opt.in" ++; and definitions from "loongarch-strings". ++; ++; Copyright (C) 2020-2022 Free Software Foundation, Inc. ++; ++; This file is part of GCC. ++; ++; GCC is free software; you can redistribute it and/or modify it under ++; the terms of the GNU General Public License as published by the Free ++; Software Foundation; either version 3, or (at your option) any later ++; version. ++; ++; GCC is distributed in the hope that it will be useful, but WITHOUT ++; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++; License for more details. ++; ++; You should have received a copy of the GNU General Public License ++; along with GCC; see the file COPYING3. If not see ++; . ++; ++ ++HeaderInclude ++config/loongarch/loongarch-opts.h ++ ++HeaderInclude ++config/loongarch/loongarch-str.h ++ ++TargetVariable ++unsigned int recip_mask = 0 ++ ++; ISA related options ++;; Base ISA ++Enum ++Name(isa_base) Type(int) ++Basic ISAs of LoongArch: ++ ++EnumValue ++Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) ++ ++;; ISA extensions / adjustments ++Enum ++Name(isa_ext_fpu) Type(int) ++FPU types of LoongArch: ++ ++EnumValue ++Enum(isa_ext_fpu) String(@@STR_NONE@@) Value(ISA_EXT_NONE) ++ ++EnumValue ++Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU32@@) Value(ISA_EXT_FPU32) ++ ++EnumValue ++Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64) ++ ++m@@OPTSTR_ISA_EXT_FPU@@= ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++-m@@OPTSTR_ISA_EXT_FPU@@=FPU Generate code for the given FPU. ++ ++m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@ ++Target RejectNegative Alias(m@@OPTSTR_ISA_EXT_FPU@@=,@@STR_NONE@@) ++ ++m@@OPTSTR_SOFT_FLOAT@@ ++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(m@@OPTSTR_SINGLE_FLOAT@@) ++Prevent the use of all hardware floating-point instructions. ++ ++m@@OPTSTR_SINGLE_FLOAT@@ ++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(m@@OPTSTR_DOUBLE_FLOAT@@) ++Restrict the use of hardware floating-point instructions to 32-bit operations. ++ ++m@@OPTSTR_DOUBLE_FLOAT@@ ++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(m@@OPTSTR_SOFT_FLOAT@@) ++Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations. ++ ++Enum ++Name(isa_ext_simd) Type(int) ++SIMD extension levels of LoongArch: ++ ++EnumValue ++Enum(isa_ext_simd) String(@@STR_NONE@@) Value(ISA_EXT_NONE) ++ ++EnumValue ++Enum(isa_ext_simd) String(@@STR_ISA_EXT_LSX@@) Value(ISA_EXT_SIMD_LSX) ++ ++EnumValue ++Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX) ++ ++m@@OPTSTR_ISA_EXT_SIMD@@= ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++-m@@OPTSTR_ISA_EXT_SIMD@@=SIMD Generate code for the given SIMD extension. ++ ++m@@STR_ISA_EXT_LSX@@ ++Target Driver Defer Var(la_deferred_options) ++Enable LoongArch SIMD Extension (LSX, 128-bit). ++ ++m@@STR_ISA_EXT_LASX@@ ++Target Driver Defer Var(la_deferred_options) ++Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). ++ ++;; Base target models (implies ISA & tune parameters) ++Enum ++Name(cpu_type) Type(int) ++LoongArch CPU types: ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_NATIVE@@) Value(CPU_NATIVE) ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_ABI_DEFAULT@@) Value(CPU_ABI_DEFAULT) ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64) ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464) ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LA264@@) Value(CPU_LA264) ++ ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LA364@@) Value(CPU_LA364) ++ ++m@@OPTSTR_ARCH@@= ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++-m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. ++ ++m@@OPTSTR_TUNE@@= ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++-m@@OPTSTR_TUNE@@=PROCESSOR Generate optimized code for PROCESSOR. ++ ++ ++; ABI related options ++; (ISA constraints on ABI are handled dynamically) ++ ++;; Base ABI ++Enum ++Name(abi_base) Type(int) ++Base ABI types for LoongArch: ++ ++EnumValue ++Enum(abi_base) String(@@STR_ABI_BASE_LP64D@@) Value(ABI_BASE_LP64D) ++ ++EnumValue ++Enum(abi_base) String(@@STR_ABI_BASE_LP64F@@) Value(ABI_BASE_LP64F) ++ ++EnumValue ++Enum(abi_base) String(@@STR_ABI_BASE_LP64S@@) Value(ABI_BASE_LP64S) ++ ++m@@OPTSTR_ABI_BASE@@= ++Target RejectNegative Joined ToLower Enum(abi_base) Var(la_opt_abi_base) Init(M_OPT_UNSET) ++-m@@OPTSTR_ABI_BASE@@=BASEABI Generate code that conforms to the given BASEABI. ++ ++;; Legacy option: -mabi=lp64 ++m@@OPTSTR_ABI_BASE@@=@@STR_ABI_BASE_LP64@@ ++Target RejectNegative Mask(LP64) ++-m@@OPTSTR_ABI_BASE@@=@@STR_ABI_BASE_LP64@@ Legacy option that enables the lp64 integer ABI. ++ ++;; ABI Extension ++Variable ++int la_opt_abi_ext = M_OPT_UNSET ++ ++mbranch-cost= ++Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++-mbranch-cost=COST Set the cost of branches to roughly COST instructions. ++ ++mvecarg ++Target Report Var(TARGET_VECARG) Init(1) ++Target pass vect arg uses vector register. ++ ++mmemvec-cost= ++Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) ++mmemvec-cost=COST Set the cost of vector memory access instructions. ++ ++mveclibabi= ++Target RejectNegative Joined Var(loongarch_veclibabi_name) ++Vector library ABI to use. ++ ++mstackrealign ++Target Var(loongarch_stack_realign) Init(1) ++Realign stack in prologue. ++ ++mforce-drap ++Target Var(loongarch_force_drap) Init(0) ++Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack. ++ ++mcheck-zero-division ++Target Mask(CHECK_ZERO_DIV) ++Trap on integer divide by zero. ++ ++mcond-move-int ++Target Var(TARGET_COND_MOVE_INT) Init(1) ++Conditional moves for integral are enabled. ++ ++mcond-move-float ++Target Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Conditional moves for float are enabled. ++ ++mmemcpy ++Target Mask(MEMCPY) ++Prevent optimizing block moves, which is also the default behavior of -Os. ++ ++mstrict-align ++Target Var(TARGET_STRICT_ALIGN) Init(0) ++Do not generate unaligned memory accesses. ++ ++mmax-inline-memcpy-size= ++Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. ++ ++mrecip ++Target Report RejectNegative Var(loongarch_recip) ++Generate reciprocals instead of divss and sqrtss. ++ ++mrecip= ++Target Report RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ ++; The code model option names for -mcmodel. ++Enum ++Name(cmodel) Type(int) ++The code model option names for -mcmodel: ++ ++EnumValue ++Enum(cmodel) String(@@STR_CMODEL_NORMAL@@) Value(CMODEL_NORMAL) ++ ++EnumValue ++Enum(cmodel) String(@@STR_CMODEL_TINY@@) Value(CMODEL_TINY) ++ ++EnumValue ++Enum(cmodel) String(@@STR_CMODEL_TS@@) Value(CMODEL_TINY_STATIC) ++ ++EnumValue ++Enum(cmodel) String(@@STR_CMODEL_LARGE@@) Value(CMODEL_LARGE) ++ ++EnumValue ++Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME) ++ ++mcmodel= ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) ++Specify the code model. +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 1304e2e97..603aed5a2 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -1,4 +1,5 @@ +-/* Definitions for LARCH systems using GNU userspace. ++/* Definitions for LoongArch systems using GNU (glibc-based) userspace, ++ or other userspace with libc derived from glibc. + Copyright (C) 1998-2018 Free Software Foundation, Inc. + + This file is part of GCC. +@@ -17,116 +18,66 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + ++/* Define the size of the wide character type. */ + #undef WCHAR_TYPE + #define WCHAR_TYPE "int" + + #undef WCHAR_TYPE_SIZE + #define WCHAR_TYPE_SIZE 32 + +-#undef ASM_DECLARE_OBJECT_NAME +-#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name + +-/* If we don't set MASK_ABICALLS, we can't default to PIC. */ +-/* #undef TARGET_DEFAULT */ +-/* #define TARGET_DEFAULT MASK_ABICALLS */ ++/* GNU-specific SPEC definitions. */ ++#define GNU_USER_LINK_EMULATION "elf" ABI_GRLEN_SPEC "loongarch" + +-#define TARGET_OS_CPP_BUILTINS() \ +- do { \ +- GNU_USER_TARGET_OS_CPP_BUILTINS(); \ +- /* The GNU C++ standard library requires this. */ \ +- if (c_dialect_cxx ()) \ +- builtin_define ("_GNU_SOURCE"); \ +- } while (0) ++#undef GLIBC_DYNAMIC_LINKER ++#define GLIBC_DYNAMIC_LINKER \ ++ "/lib" ABI_GRLEN_SPEC "/" \ ++ "%{mabi=lp64d:ld.so.1;" \ ++ "mabi=lp64s:ld-linux-loongarch-lp64s.so.1;" \ ++ "mabi=lp64f:ld-linux-loongarch-lp64f.so.1}" + +-#undef SUBTARGET_CPP_SPEC +-#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" +- +-/* A standard GNU/Linux mapping. On most targets, it is included in +- CC1_SPEC itself by config/linux.h, but loongarch.h overrides CC1_SPEC +- and provides this hook instead. */ +-#undef SUBTARGET_CC1_SPEC +-#define SUBTARGET_CC1_SPEC GNU_USER_TARGET_CC1_SPEC +- +-/* -G is incompatible with -KPIC which is the default, so only allow objects +- in the small data section if the user explicitly asks for it. */ +-#undef LARCH_DEFAULT_GVALUE +-#define LARCH_DEFAULT_GVALUE 0 ++#undef MUSL_DYNAMIC_LINKER ++#define MUSL_DYNAMIC_LINKER \ ++ "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1" + + #undef GNU_USER_TARGET_LINK_SPEC +-#define GNU_USER_TARGET_LINK_SPEC "\ +- %{G*} %{EB} %{EL} %{shared} \ +- %{!shared: \ +- %{!static: \ +- %{rdynamic:-export-dynamic} \ +- %{mabi=lp32: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP32 "} \ +- %{mabi=lp64: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP64 "}} \ +- %{static}} \ +- %{mabi=lp32:-m" GNU_USER_LINK_EMULATION32 "} \ +- %{mabi=lp64:-m" GNU_USER_LINK_EMULATION64 "}" ++#define GNU_USER_TARGET_LINK_SPEC \ ++ "%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \ ++ "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \ ++ "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}" + +-#undef LINK_SPEC +-#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC + +-/* The LARCH assembler has different syntax for .set. We set it to +- .dummy to trap any errors. */ +-#undef SET_ASM_OP +-#define SET_ASM_OP "\t.dummy\t" +- +-#undef ASM_OUTPUT_DEF +-#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \ +- do { \ +- fputc ( '\t', FILE); \ +- assemble_name (FILE, LABEL1); \ +- fputs ( " = ", FILE); \ +- assemble_name (FILE, LABEL2); \ +- fputc ( '\n', FILE); \ +- } while (0) +- +-/* The glibc _mcount stub will save $v0 for us. Don't mess with saving +- it, since ASM_OUTPUT_REG_PUSH/ASM_OUTPUT_REG_POP do not work in the +- presence of $gp-relative calls. */ +-#undef ASM_OUTPUT_REG_PUSH +-#undef ASM_OUTPUT_REG_POP ++/* Similar to standard Linux, but adding -ffast-math support. */ ++#undef GNU_USER_TARGET_MATHFILE_SPEC ++#define GNU_USER_TARGET_MATHFILE_SPEC \ ++ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" + + #undef LIB_SPEC + #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC + +-#define NO_SHARED_SPECS "" +- +-/* -march=native handling only makes sense with compiler running on +- a LARCH chip. */ +-#if defined(__loongarch__) +-extern const char *host_detect_local_cpu (int argc, const char **argv); +-# define EXTRA_SPEC_FUNCTIONS \ +- { "local_cpu_detect", host_detect_local_cpu }, +- +-# define MARCH_MTUNE_NATIVE_SPECS \ +- " %{march=native:%. ++ ++;; Uncomment the following line to output automata for debugging. ++;; (automata_option "v") ++ ++;; Automaton for integer instructions. ++(define_automaton "la464_a_alu") ++ ++;; Automaton for floating-point instructions. ++(define_automaton "la464_a_falu") ++ ++;; Automaton for memory operations. ++(define_automaton "la464_a_mem") ++ ++;; Describe the resources. ++ ++(define_cpu_unit "la464_alu1" "la464_a_alu") ++(define_cpu_unit "la464_alu2" "la464_a_alu") ++(define_cpu_unit "la464_mem1" "la464_a_mem") ++(define_cpu_unit "la464_mem2" "la464_a_mem") ++(define_cpu_unit "la464_falu1" "la464_a_falu") ++(define_cpu_unit "la464_falu2" "la464_a_falu") ++ ++;; Describe instruction reservations. ++ ++(define_insn_reservation "la464_arith" 1 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "arith,clz,const,logical, ++ move,nop,shift,signext,slt")) ++ "la464_alu1 | la464_alu2") ++ ++(define_insn_reservation "la464_branch" 1 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "branch,jump,call,condmove,trap")) ++ "la464_alu1 | la464_alu2") ++ ++(define_insn_reservation "la464_imul" 7 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "imul")) ++ "la464_alu1 | la464_alu2") ++ ++(define_insn_reservation "la464_idiv_si" 12 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (and (eq_attr "type" "idiv") ++ (eq_attr "mode" "SI"))) ++ "la464_alu1 | la464_alu2") ++ ++(define_insn_reservation "la464_idiv_di" 25 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (and (eq_attr "type" "idiv") ++ (eq_attr "mode" "DI"))) ++ "la464_alu1 | la464_alu2") ++ ++(define_insn_reservation "la464_load" 4 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "load")) ++ "la464_mem1 | la464_mem2") ++ ++(define_insn_reservation "la464_gpr_fp" 16 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "mftg,mgtf")) ++ "la464_mem1") ++ ++(define_insn_reservation "la464_fpload" 4 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "fpload")) ++ "la464_mem1 | la464_mem2") ++ ++(define_insn_reservation "la464_prefetch" 0 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "prefetch,prefetchx")) ++ "la464_mem1 | la464_mem2") ++ ++(define_insn_reservation "la464_store" 0 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "store,fpstore,fpidxstore")) ++ "la464_mem1 | la464_mem2") ++ ++(define_insn_reservation "la464_fadd" 4 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "fadd,fmul,fmadd")) ++ "la464_falu1 | la464_falu2") ++ ++(define_insn_reservation "la464_fcmp" 2 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "fabs,fcmp,fmove,fneg")) ++ "la464_falu1 | la464_falu2") ++ ++(define_insn_reservation "la464_fcvt" 4 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "fcvt")) ++ "la464_falu1 | la464_falu2") ++ ++(define_insn_reservation "la464_fdiv_sf" 12 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") ++ (eq_attr "mode" "SF"))) ++ "la464_falu1 | la464_falu2") ++ ++(define_insn_reservation "la464_fdiv_df" 19 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") ++ (eq_attr "mode" "DF"))) ++ "la464_falu1 | la464_falu2") ++ ++;; Force single-dispatch for unknown or multi. ++(define_insn_reservation "la464_unknown" 1 ++ (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664") ++ (eq_attr "type" "unknown,multi,atomic,syncloop")) ++ "la464_alu1 + la464_alu2 + la464_falu1 ++ + la464_falu2 + la464_mem1 + la464_mem2") ++ ++;; End of DFA-based pipeline description for la464 +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index c649bf3f4..8e26ed6f0 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -1,384 +1,353 @@ + /* Intrinsics for LoongArch BASE operations. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. + +- Copyright (C) 2019 Free Software Foundation, Inc. +- Contributed by xuchenghua@loongson.cn. ++This file is part of GCC. + +- This file is part of GCC. ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published ++by the Free Software Foundation; either version 3, or (at your ++option) any later version. + +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++License for more details. + +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. + +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- . */ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ + + #ifndef _GCC_LOONGARCH_BASE_INTRIN_H + #define _GCC_LOONGARCH_BASE_INTRIN_H + + #ifdef __cplusplus +-extern "C"{ ++extern "C" { + #endif + +-typedef struct drdtime{ +- unsigned long dvalue; +- unsigned long dtimeid; ++typedef struct drdtime ++{ ++ unsigned long dvalue; ++ unsigned long dtimeid; + } __drdtime_t; + +-typedef struct rdtime{ +- unsigned int value; +- unsigned int timeid; ++typedef struct rdtime ++{ ++ unsigned int value; ++ unsigned int timeid; + } __rdtime_t; + + #ifdef __loongarch64 +-extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_rdtime_d (void) ++extern __inline __drdtime_t ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__rdtime_d (void) + { +- __drdtime_t drdtime; ++ __drdtime_t __drdtime; + __asm__ volatile ( + "rdtime.d\t%[val],%[tid]\n\t" +- : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) +- : +- ); +- return drdtime; ++ : [val]"=&r"(__drdtime.dvalue),[tid]"=&r"(__drdtime.dtimeid) ++ :); ++ return __drdtime; + } +-#define __rdtime_d __builtin_loongarch_rdtime_d + #endif + +-extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_rdtimeh_w (void) ++extern __inline __rdtime_t ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__rdtimeh_w (void) + { +- __rdtime_t rdtime; ++ __rdtime_t __rdtime; + __asm__ volatile ( + "rdtimeh.w\t%[val],%[tid]\n\t" +- : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) +- : +- ); +- return rdtime; ++ : [val]"=&r"(__rdtime.value),[tid]"=&r"(__rdtime.timeid) ++ :); ++ return __rdtime; + } +-#define __rdtimel_w __builtin_loongarch_rdtimel_w + +-extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_rdtimel_w (void) ++extern __inline __rdtime_t ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__rdtimel_w (void) + { +- __rdtime_t rdtime; ++ __rdtime_t __rdtime; + __asm__ volatile ( + "rdtimel.w\t%[val],%[tid]\n\t" +- : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) +- : +- ); +- return rdtime; ++ : [val]"=&r"(__rdtime.value),[tid]"=&r"(__rdtime.timeid) ++ :); ++ return __rdtime; + } +-#define __rdtimeh_w __builtin_loongarch_rdtimeh_w +- +-/* Assembly instruction format: rj, fcsr */ +-/* Data types in instruction templates: USI, UQI */ +-#define __movfcsr2gr(/*ui5*/_1) __builtin_loongarch_movfcsr2gr((_1)); +- +-/* Assembly instruction format: 0, fcsr, rj */ +-/* Data types in instruction templates: VOID, UQI, USI */ +-#define __movgr2fcsr(/*ui5*/ _1, _2) __builtin_loongarch_movgr2fcsr((unsigned short)_1, (unsigned int)_2); +- +-#ifdef __loongarch32 +-/* Assembly instruction format: ui5, rj, si12 */ +-/* Data types in instruction templates: VOID, USI, USI, SI */ +-#define __cacop(/*ui5*/ _1, /*unsigned int*/ _2, /*si12*/ _3) ((void)__builtin_loongarch_cacop((_1), (unsigned int)(_2), (_3))) +-#elif defined __loongarch64 +-/* Assembly instruction format: ui5, rj, si12 */ +-/* Data types in instruction templates: VOID, USI, UDI, SI */ +-#define __dcacop(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) ((void)__builtin_loongarch_dcacop((_1), (unsigned long int)(_2), (_3))) ++ ++/* Assembly instruction format: rj, fcsr. */ ++/* Data types in instruction templates: USI, UQI. */ ++#define __movfcsr2gr(/*ui5*/ _1) __builtin_loongarch_movfcsr2gr ((_1)); ++ ++/* Assembly instruction format: fcsr, rj. */ ++/* Data types in instruction templates: VOID, UQI, USI. */ ++#define __movgr2fcsr(/*ui5*/ _1, _2) \ ++ __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2); ++ ++#if defined __loongarch64 ++/* Assembly instruction format: ui5, rj, si12. */ ++/* Data types in instruction templates: VOID, USI, UDI, SI. */ ++#define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \ ++ ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3))) + #else +-# error "Don't support this ABI." ++#error "Unsupported ABI." + #endif + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: USI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-unsigned int __cpucfg(unsigned int _1) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: USI, USI. */ ++extern __inline unsigned int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__cpucfg (unsigned int _1) + { +- return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); ++ return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1); + } + + #ifdef __loongarch64 +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: DI, DI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-void __asrtle_d(long int _1, long int _2) ++/* Assembly instruction format: rj, rk. */ ++/* Data types in instruction templates: DI, DI. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__asrtle_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); ++ __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2); + } + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: DI, DI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-void __asrtgt_d(long int _1, long int _2) ++/* Assembly instruction format: rj, rk. */ ++/* Data types in instruction templates: DI, DI. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__asrtgt_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++ __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2); + } + #endif + +-#ifdef __loongarch32 +-/* Assembly instruction format: rd, rj, ui5 */ +-/* Data types in instruction templates: SI, SI, UQI */ +-#define __lddir(/*int*/ _1, /*ui5*/ _2) ((int)__builtin_loongarch_lddir((int)(_1), (_2))) +-#elif defined __loongarch64 +-/* Assembly instruction format: rd, rj, ui5 */ +-/* Data types in instruction templates: DI, DI, UQI */ +-#define __dlddir(/*long int*/ _1, /*ui5*/ _2) ((long int)__builtin_loongarch_dlddir((long int)(_1), (_2))) ++#if defined __loongarch64 ++/* Assembly instruction format: rd, rj, ui5. */ ++/* Data types in instruction templates: DI, DI, UQI. */ ++#define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ ++ ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2))) + #else +-# error "Don't support this ABI." ++#error "Unsupported ABI." + #endif + +-#ifdef __loongarch32 +-/* Assembly instruction format: rj, ui5 */ +-/* Data types in instruction templates: VOID, SI, UQI */ +-#define __ldpte(/*int*/ _1, /*ui5*/ _2) ((void)__builtin_loongarch_ldpte((int)(_1), (_2))) +-#elif defined __loongarch64 +-/* Assembly instruction format: rj, ui5 */ +-/* Data types in instruction templates: VOID, DI, UQI */ +-#define __dldpte(/*long int*/ _1, /*ui5*/ _2) ((void)__builtin_loongarch_dldpte((long int)(_1), (_2))) ++#if defined __loongarch64 ++/* Assembly instruction format: rj, ui5. */ ++/* Data types in instruction templates: VOID, DI, UQI. */ ++#define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ ++ ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2))) + #else +-# error "Don't support this ABI." ++#error "Unsupported ABI." + #endif + +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, QI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crc_w_b_w(char _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, QI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crc_w_b_w (char _1, int _2) + { +- return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++ return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2); + } + +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, HI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crc_w_h_w(short _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, HI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crc_w_h_w (short _1, int _2) + { +- return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++ return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2); + } + +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, SI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crc_w_w_w(int _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, SI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crc_w_w_w (int _1, int _2) + { +- return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++ return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2); + } + + #ifdef __loongarch64 +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, DI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crc_w_d_w(long int _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, DI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crc_w_d_w (long int _1, int _2) + { +- return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++ return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2); + } + #endif + +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, QI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crcc_w_b_w(char _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, QI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crcc_w_b_w (char _1, int _2) + { +- return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); ++ return (int) __builtin_loongarch_crcc_w_b_w ((char) _1, (int) _2); + } + +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, HI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crcc_w_h_w(short _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, HI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crcc_w_h_w (short _1, int _2) + { +- return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); ++ return (int) __builtin_loongarch_crcc_w_h_w ((short) _1, (int) _2); + } + +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, SI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crcc_w_w_w(int _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, SI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crcc_w_w_w (int _1, int _2) + { +- return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); ++ return (int) __builtin_loongarch_crcc_w_w_w ((int) _1, (int) _2); + } + + #ifdef __loongarch64 +-/* Assembly instruction format: rd, rj, rk */ +-/* Data types in instruction templates: SI, DI, SI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-int __crcc_w_d_w(long int _1, int _2) ++/* Assembly instruction format: rd, rj, rk. */ ++/* Data types in instruction templates: SI, DI, SI. */ ++extern __inline int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__crcc_w_d_w (long int _1, int _2) + { +- return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); ++ return (int) __builtin_loongarch_crcc_w_d_w ((long int) _1, (int) _2); + } + #endif + +-/* Assembly instruction format: rd, ui14 */ +-/* Data types in instruction templates: USI, USI */ +-#define __csrrd(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd((_1))) ++/* Assembly instruction format: rd, ui14. */ ++/* Data types in instruction templates: USI, USI. */ ++#define __csrrd_w(/*ui14*/ _1) \ ++ ((unsigned int) __builtin_loongarch_csrrd_w ((_1))) + +-/* Assembly instruction format: rd, ui14 */ +-/* Data types in instruction templates: USI, USI, USI */ +-#define __csrwr(/*unsigned int*/ _1, /*ui14*/ _2) ((unsigned int)__builtin_loongarch_csrwr((unsigned int)(_1), (_2))) ++/* Assembly instruction format: rd, ui14. */ ++/* Data types in instruction templates: USI, USI, USI. */ ++#define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ ++ ((unsigned int) __builtin_loongarch_csrwr_w ((unsigned int) (_1), (_2))) + +-/* Assembly instruction format: rd, rj, ui14 */ +-/* Data types in instruction templates: USI, USI, USI, USI */ +-#define __csrxchg(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) ((unsigned int)__builtin_loongarch_csrxchg((unsigned int)(_1), (unsigned int)(_2), (_3))) ++/* Assembly instruction format: rd, rj, ui14. */ ++/* Data types in instruction templates: USI, USI, USI, USI. */ ++#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ ++ ((unsigned int) __builtin_loongarch_csrxchg_w ((unsigned int) (_1), \ ++ (unsigned int) (_2), (_3))) + + #ifdef __loongarch64 +-/* Assembly instruction format: rd, ui14 */ +-/* Data types in instruction templates: UDI, USI */ +-#define __dcsrrd(/*ui14*/ _1) ((unsigned long int)__builtin_loongarch_dcsrrd((_1))) +- +-/* Assembly instruction format: rd, ui14 */ +-/* Data types in instruction templates: UDI, UDI, USI */ +-#define __dcsrwr(/*unsigned long int*/ _1, /*ui14*/ _2) ((unsigned long int)__builtin_loongarch_dcsrwr((unsigned long int)(_1), (_2))) +- +-/* Assembly instruction format: rd, rj, ui14 */ +-/* Data types in instruction templates: UDI, UDI, UDI, USI */ +-#define __dcsrxchg(/*unsigned long int*/ _1, /*unsigned long int*/ _2, /*ui14*/ _3) ((unsigned long int)__builtin_loongarch_dcsrxchg((unsigned long int)(_1), (unsigned long int)(_2), (_3))) ++/* Assembly instruction format: rd, ui14. */ ++/* Data types in instruction templates: UDI, USI. */ ++#define __csrrd_d(/*ui14*/ _1) \ ++ ((unsigned long int) __builtin_loongarch_csrrd_d ((_1))) ++ ++/* Assembly instruction format: rd, ui14. */ ++/* Data types in instruction templates: UDI, UDI, USI. */ ++#define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ ++ ((unsigned long int) __builtin_loongarch_csrwr_d ((unsigned long int) (_1), \ ++ (_2))) ++ ++/* Assembly instruction format: rd, rj, ui14. */ ++/* Data types in instruction templates: UDI, UDI, UDI, USI. */ ++#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ ++ /*ui14*/ _3) \ ++ ((unsigned long int) __builtin_loongarch_csrxchg_d ( \ ++ (unsigned long int) (_1), (unsigned long int) (_2), (_3))) + #endif + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: UQI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-unsigned char __iocsrrd_b(unsigned int _1) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: UQI, USI. */ ++extern __inline unsigned char ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrrd_b (unsigned int _1) + { +- return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); ++ return (unsigned char) __builtin_loongarch_iocsrrd_b ((unsigned int) _1); + } + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: UHI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-unsigned short __iocsrrd_h(unsigned int _1) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: UHI, USI. */ ++extern __inline unsigned char ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrrd_h (unsigned int _1) + { +- return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); ++ return (unsigned short) __builtin_loongarch_iocsrrd_h ((unsigned int) _1); + } + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: USI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-unsigned int __iocsrrd_w(unsigned int _1) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: USI, USI. */ ++extern __inline unsigned int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrrd_w (unsigned int _1) + { +- return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); ++ return (unsigned int) __builtin_loongarch_iocsrrd_w ((unsigned int) _1); + } + + #ifdef __loongarch64 +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: UDI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-unsigned long int __iocsrrd_d(unsigned int _1) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: UDI, USI. */ ++extern __inline unsigned long int ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrrd_d (unsigned int _1) + { +- return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); ++ return (unsigned long int) __builtin_loongarch_iocsrrd_d ((unsigned int) _1); + } + #endif + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: VOID, UQI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-void __iocsrwr_b(unsigned char _1, unsigned int _2) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: VOID, UQI, USI. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrwr_b (unsigned char _1, unsigned int _2) + { +- return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); ++ __builtin_loongarch_iocsrwr_b ((unsigned char) _1, (unsigned int) _2); + } + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: VOID, UHI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-void __iocsrwr_h(unsigned short _1, unsigned int _2) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: VOID, UHI, USI. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrwr_h (unsigned short _1, unsigned int _2) + { +- return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); ++ __builtin_loongarch_iocsrwr_h ((unsigned short) _1, (unsigned int) _2); + } + +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: VOID, USI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-void __iocsrwr_w(unsigned int _1, unsigned int _2) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: VOID, USI, USI. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrwr_w (unsigned int _1, unsigned int _2) + { +- return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); ++ __builtin_loongarch_iocsrwr_w ((unsigned int) _1, (unsigned int) _2); + } + + #ifdef __loongarch64 +-/* Assembly instruction format: rd, rj */ +-/* Data types in instruction templates: VOID, UDI, USI */ +-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-void __iocsrwr_d(unsigned long int _1, unsigned int _2) ++/* Assembly instruction format: rd, rj. */ ++/* Data types in instruction templates: VOID, UDI, USI. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__iocsrwr_d (unsigned long int _1, unsigned int _2) + { +- return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++ __builtin_loongarch_iocsrwr_d ((unsigned long int) _1, (unsigned int) _2); + } + #endif + +-/* Assembly instruction format: ui15 */ +-/* Data types in instruction templates: UQI */ +-#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) +- +-/* Assembly instruction format: ui15 */ +-/* Data types in instruction templates: UQI */ +-#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) +- +-#define __builtin_loongarch_syscall(a) \ +-{ \ +- __asm__ volatile ("syscall %0\n\t" \ +- ::"I"(a)); \ +-} +-#define __syscall __builtin_loongarch_syscall +- +-#define __builtin_loongarch_break(a) \ +-{ \ +- __asm__ volatile ("break %0\n\t" \ +- ::"I"(a)); \ +-} +-#define __break __builtin_loongarch_break +- +- +-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_tlbsrch (void) +-{ +- __asm__ volatile ("tlbsrch\n\t"); +-} +-#define __tlbsrch __builtin_loongarch_tlbsrch +- +-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_tlbrd (void) +-{ +- __asm__ volatile ("tlbrd\n\t"); +-} +-#define __tlbrd __builtin_loongarch_tlbrd ++/* Assembly instruction format: ui15. */ ++/* Data types in instruction templates: USI. */ ++#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1)) + +-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_tlbwr (void) +-{ +- __asm__ volatile ("tlbwr\n\t"); +-} +-#define __tlbwr __builtin_loongarch_tlbwr ++/* Assembly instruction format: ui15. */ ++/* Data types in instruction templates: USI. */ ++#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar ((_1)) + +-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_tlbfill (void) +-{ +- __asm__ volatile ("tlbfill\n\t"); +-} +-#define __tlbfill __builtin_loongarch_tlbfill +- +-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_tlbclr (void) +-{ +- __asm__ volatile ("tlbclr\n\t"); +-} +-#define __tlbclr __builtin_loongarch_tlbclr +- +-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__builtin_loongarch_tlbflush (void) +-{ +- __asm__ volatile ("tlbflush\n\t"); +-} +-#define __tlbflush __builtin_loongarch_tlbflush ++/* Assembly instruction format: ui15. */ ++/* Data types in instruction templates: USI. */ ++#define __syscall(/*ui15*/ _1) __builtin_loongarch_syscall ((_1)) + ++/* Assembly instruction format: ui15. */ ++/* Data types in instruction templates: USI. */ ++#define __break(/*ui15*/ _1) __builtin_loongarch_break ((_1)) + + #ifdef __cplusplus + } +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 24757aaa1..515336e05 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -212,6 +212,9 @@ + ;; As ILASX but excludes V32QI. + (define_mode_iterator ILASX_DWH [V4DI V8SI V16HI]) + ++;; As LASX but excludes V32QI. ++(define_mode_iterator LASX_DWH [V4DF V8SF V4DI V8SI V16HI]) ++ + ;; As ILASX but excludes V4DI. + (define_mode_iterator ILASX_WHB [V8SI V16HI V32QI]) + +@@ -227,7 +230,7 @@ + ;; Only used for immediate set shuffle elements instruction. + (define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF]) + +-;; The atribute gives the integer vector mode with same size in Loongson ASX. ++;; The attribute gives the integer vector mode with same size in Loongson ASX. + (define_mode_attr VIMODE256 + [(V4DF "V4DI") + (V8SF "V8SI") +@@ -476,6 +479,37 @@ + (V16HI "w") + (V32QI "w")]) + ++(define_int_iterator FRINT256_S [UNSPEC_LASX_XVFRINTRP_S ++ UNSPEC_LASX_XVFRINTRZ_S ++ UNSPEC_LASX_XVFRINT ++ UNSPEC_LASX_XVFRINTRM_S]) ++ ++(define_int_iterator FRINT256_D [UNSPEC_LASX_XVFRINTRP_D ++ UNSPEC_LASX_XVFRINTRZ_D ++ UNSPEC_LASX_XVFRINT ++ UNSPEC_LASX_XVFRINTRM_D]) ++ ++(define_int_attr frint256_pattern_s ++ [(UNSPEC_LASX_XVFRINTRP_S "ceil") ++ (UNSPEC_LASX_XVFRINTRZ_S "btrunc") ++ (UNSPEC_LASX_XVFRINT "rint") ++ (UNSPEC_LASX_XVFRINTRM_S "floor")]) ++ ++(define_int_attr frint256_pattern_d ++ [(UNSPEC_LASX_XVFRINTRP_D "ceil") ++ (UNSPEC_LASX_XVFRINTRZ_D "btrunc") ++ (UNSPEC_LASX_XVFRINT "rint") ++ (UNSPEC_LASX_XVFRINTRM_D "floor")]) ++ ++(define_int_attr frint256_suffix ++ [(UNSPEC_LASX_XVFRINTRP_S "rp") ++ (UNSPEC_LASX_XVFRINTRP_D "rp") ++ (UNSPEC_LASX_XVFRINTRZ_S "rz") ++ (UNSPEC_LASX_XVFRINTRZ_D "rz") ++ (UNSPEC_LASX_XVFRINT "") ++ (UNSPEC_LASX_XVFRINTRM_S "rm") ++ (UNSPEC_LASX_XVFRINTRM_D "rm")]) ++ + (define_expand "vec_init" + [(match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "")] +@@ -497,7 +531,6 @@ + "xvpickev.\t%u0,%u2,%u1\n\txvpermi.d\t%u0,%u0,0xd8" + [(set_attr "type" "simd_permute") + (set_attr "mode" "") +- (set_attr "can_delay" "no") + (set_attr "length" "8")]) + + (define_expand "vec_unpacks_hi_v8sf" +@@ -522,7 +555,6 @@ + operands[2] = loongarch_lsx_vec_parallel_const_half (V8SFmode, false/*high_p*/); + }) + +- + (define_expand "vec_unpacks_hi_" + [(match_operand: 0 "register_operand") + (match_operand:ILASX_WHB 1 "register_operand")] +@@ -560,11 +592,11 @@ + }) + + (define_insn "lasx_xvinsgr2vr_" +- [(set (match_operand:LASX_WD 0 "register_operand" "=f") +- (vec_merge:LASX_WD +- (vec_duplicate:LASX_WD ++ [(set (match_operand:ILASX_DW 0 "register_operand" "=f") ++ (vec_merge:ILASX_DW ++ (vec_duplicate:ILASX_DW + (match_operand: 1 "reg_or_0_operand" "rJ")) +- (match_operand:LASX_WD 2 "register_operand" "0") ++ (match_operand:ILASX_DW 2 "register_operand" "0") + (match_operand 3 "const__operand" "")))] + "ISA_HAS_LASX" + { +@@ -651,28 +683,49 @@ + (set_attr "mode" "V4DI")]) + + ;; xshuf.w +-(define_insn "lasx_xvperm_w" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI +- [(match_operand:V8SI 1 "register_operand" "f") +- (match_operand:V8SI 2 "register_operand" "f")] +- UNSPEC_LASX_XVPERM_W))] ++(define_insn "lasx_xvperm_" ++ [(set (match_operand:LASX_W 0 "register_operand" "=f") ++ (unspec:LASX_W ++ [(match_operand:LASX_W 1 "nonimmediate_operand" "f") ++ (match_operand:V8SI 2 "register_operand" "f")] ++ UNSPEC_LASX_XVPERM_W))] + "ISA_HAS_LASX" + "xvperm.w\t%u0,%u1,%u2" + [(set_attr "type" "simd_splat") +- (set_attr "mode" "V8SI")]) ++ (set_attr "mode" "")]) + + ;; xvpermi.d +-(define_insn "lasx_xvpermi_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI +- [(match_operand:V4DI 1 "register_operand" "f") +- (match_operand 2 "const_uimm8_operand")] +- UNSPEC_LASX_XVPERMI_D))] ++(define_insn "lasx_xvpermi_d_" ++ [(set (match_operand:LASX 0 "register_operand" "=f") ++ (unspec:LASX ++ [(match_operand:LASX 1 "register_operand" "f") ++ (match_operand:SI 2 "const_uimm8_operand")] ++ UNSPEC_LASX_XVPERMI_D))] + "ISA_HAS_LASX" + "xvpermi.d\t%u0,%u1,%2" + [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")]) ++ (set_attr "mode" "")]) ++ ++(define_insn "lasx_xvpermi_d__1" ++ [(set (match_operand:LASX_D 0 "register_operand" "=f") ++ (vec_select:LASX_D ++ (match_operand:LASX_D 1 "register_operand" "f") ++ (parallel [(match_operand 2 "const_0_to_3_operand") ++ (match_operand 3 "const_0_to_3_operand") ++ (match_operand 4 "const_0_to_3_operand") ++ (match_operand 5 "const_0_to_3_operand")])))] ++ "ISA_HAS_LASX" ++{ ++ int mask = 0; ++ mask |= INTVAL (operands[2]) << 0; ++ mask |= INTVAL (operands[3]) << 2; ++ mask |= INTVAL (operands[4]) << 4; ++ mask |= INTVAL (operands[5]) << 6; ++ operands[2] = GEN_INT (mask); ++ return "xvpermi.d\t%u0,%u1,%2"; ++} ++ [(set_attr "type" "simd_splat") ++ (set_attr "mode" "")]) + + ;; xvpermi.q + (define_insn "lasx_xvpermi_q_" +@@ -698,82 +751,51 @@ + [(set_attr "type" "simd_copy") + (set_attr "mode" "V4DI")]) + +-(define_expand "vec_extract" +- [(match_operand: 0 "register_operand") +- (match_operand:ILASX 1 "register_operand") ++(define_expand "vec_set" ++ [(match_operand:ILASX_DW 0 "register_operand") ++ (match_operand: 1 "reg_or_0_operand") + (match_operand 2 "const__operand")] + "ISA_HAS_LASX" + { +- if (mode == SImode || mode == DImode) +- { +- emit_insn(gen_lasx_xvpickve2gr_ (operands[0], operands[1], operands[2])); +- } +- else +- { +- HOST_WIDE_INT size_0 = GET_MODE_SIZE (GET_MODE (operands[0])); +- HOST_WIDE_INT size_1 = GET_MODE_SIZE (GET_MODE (operands[1])); +- HOST_WIDE_INT val = INTVAL (operands[2]); ++ rtx index = GEN_INT (1 << INTVAL (operands[2])); ++ emit_insn (gen_lasx_xvinsgr2vr_ (operands[0], operands[1], ++ operands[0], index)); ++ DONE; ++}) + +- /* High part */ +- if (val >= size_1/size_0/2 ) +- { +- rtx dest1 = gen_reg_rtx (GET_MODE (operands[1])); +- rtx pos = GEN_INT( val - size_1/size_0/2); +- emit_insn (gen_lasx_xvpermi_q_ (dest1, dest1, operands[1], GEN_INT(1))); +- rtx dest2 = gen_reg_rtx (SImode); +- emit_insn (gen_lsx_vpickve2gr_ (dest2, +- gen_lowpart(mode, dest1), +- pos)); +- emit_move_insn (operands[0], +- gen_lowpart (mode, dest2)); +- } +- else +- { +- rtx dest1 = gen_reg_rtx (SImode); +- emit_insn (gen_lsx_vpickve2gr_ (dest1, +- gen_lowpart(mode, operands[1]), +- operands[2])); +- emit_move_insn (operands[0], +- gen_lowpart (mode, dest1)); +- } +- } ++(define_expand "vec_set" ++ [(match_operand:FLASX 0 "register_operand") ++ (match_operand: 1 "reg_or_0_operand") ++ (match_operand 2 "const__operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx index = GEN_INT (1 << INTVAL (operands[2])); ++ emit_insn (gen_lasx_xvinsve0__scalar (operands[0], operands[1], ++ operands[0], index)); + DONE; + }) + + (define_expand "vec_extract" + [(match_operand: 0 "register_operand") +- (match_operand:FLASX 1 "register_operand") ++ (match_operand:LASX 1 "register_operand") + (match_operand 2 "const__operand")] + "ISA_HAS_LASX" + { +- rtx temp; +- HOST_WIDE_INT val = INTVAL (operands[2]); +- +- if (val == 0) +- temp = operands[1]; +- else +- { +- temp = gen_reg_rtx (mode); +- emit_insn (gen_lasx_xvpickve_ (temp, operands[1], operands[2])); +- } +- emit_insn (gen_lasx_vec_extract_ (operands[0], temp)); ++ loongarch_expand_vector_extract (operands[0], operands[1], ++ INTVAL (operands[2])); + DONE; + }) + +-(define_insn_and_split "lasx_vec_extract_" +- [(set (match_operand: 0 "register_operand" "=f") +- (vec_select: +- (match_operand:FLASX 1 "register_operand" "f") +- (parallel [(const_int 0)])))] ++(define_expand "vec_perm" ++ [(match_operand:LASX 0 "register_operand") ++ (match_operand:LASX 1 "register_operand") ++ (match_operand:LASX 2 "register_operand") ++ (match_operand: 3 "register_operand")] + "ISA_HAS_LASX" +- "#" +- "&& reload_completed" +- [(set (match_dup 0) (match_dup 1))] + { +- operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); +-} +- [(set_attr "move_type" "fmove") +- (set_attr "mode" "")]) ++ loongarch_expand_vec_perm_1(operands); ++ DONE; ++}) + + ;; FIXME: 256?? + (define_expand "vcondu" +@@ -860,7 +882,6 @@ + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") + (set_attr "mode" "") +- (set_attr "can_delay" "no,yes,yes,yes,yes") + (set_attr "length" "8,4,4,4,4")]) + + +@@ -868,7 +889,7 @@ + [(set (match_operand:LASX 0 "nonimmediate_operand") + (match_operand:LASX 1 "move_operand"))] + "reload_completed && ISA_HAS_LASX +- && loongarch_split_move_insn_p (operands[0], operands[1], insn)" ++ && loongarch_split_move_insn_p (operands[0], operands[1])" + [(const_int 0)] + { + loongarch_split_move_insn (operands[0], operands[1], curr_insn); +@@ -1143,7 +1164,25 @@ + [(set_attr "type" "simd_fmul") + (set_attr "mode" "")]) + +-(define_insn "div3" ++(define_expand "div3" ++ [(set (match_operand:FLASX 0 "register_operand") ++ (div:FLASX (match_operand:FLASX 1 "register_operand") ++ (match_operand:FLASX 2 "register_operand")))] ++ "ISA_HAS_LASX" ++{ ++ if (mode == V8SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands[0], operands[1], ++ operands[2], V8SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div3" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (div:FLASX (match_operand:FLASX 1 "register_operand" "f") + (match_operand:FLASX 2 "register_operand" "f")))] +@@ -1172,7 +1211,23 @@ + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +-(define_insn "sqrt2" ++(define_expand "sqrt2" ++ [(set (match_operand:FLASX 0 "register_operand") ++ (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))] ++ "ISA_HAS_LASX" ++{ ++ if (mode == V8SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt2" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f")))] + "ISA_HAS_LASX" +@@ -1307,13 +1362,13 @@ + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvbitsel_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (ior:ILASX (and:ILASX (not:ILASX +- (match_operand:ILASX 3 "register_operand" "f")) +- (match_operand:ILASX 1 "register_operand" "f")) +- (and:ILASX (match_dup 3) +- (match_operand:ILASX 2 "register_operand" "f"))))] ++(define_insn "lasx_xvbitsel_" ++ [(set (match_operand:LASX 0 "register_operand" "=f") ++ (ior:LASX (and:LASX (not:LASX ++ (match_operand:LASX 3 "register_operand" "0")) ++ (match_operand:LASX 1 "register_operand" "f")) ++ (and:LASX (match_dup 3) ++ (match_operand:LASX 2 "register_operand" "f"))))] + "ISA_HAS_LASX" + "xvbitsel.v\t%u0,%u1,%u2,%u3" + [(set_attr "type" "simd_bitmov") +@@ -1363,11 +1418,11 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_expand "vec_cmp" +- [(set (match_operand:ILASX 0 "register_operand") +- (match_operator:ILASX 1 "" +- [(match_operand:ILASX 2 "register_operand") +- (match_operand:ILASX 3 "register_operand")]))] ++(define_expand "vec_cmp" ++ [(set (match_operand: 0 "register_operand") ++ (match_operator 1 "" ++ [(match_operand:LASX 2 "register_operand") ++ (match_operand:LASX 3 "register_operand")]))] + "ISA_HAS_LASX" + { + bool ok = loongarch_expand_int_vec_cmp (operands); +@@ -1375,11 +1430,11 @@ + DONE; + }) + +-(define_expand "vec_cmp" +- [(set (match_operand:FLASX 0 "register_operand") +- (match_operator:FLASX 1 "" +- [(match_operand:FLASX 2 "register_operand") +- (match_operand:FLASX 3 "register_operand")]))] ++(define_expand "vec_cmpu" ++ [(set (match_operand: 0 "register_operand") ++ (match_operator 1 "" ++ [(match_operand:ILASX 2 "register_operand") ++ (match_operand:ILASX 3 "register_operand")]))] + "ISA_HAS_LASX" + { + bool ok = loongarch_expand_fp_vec_cmp (operands); +@@ -1493,8 +1548,8 @@ + (V2DF "V8SI")]) + + (define_insn "lasx_xvreplgr2vr_" +- [(set (match_operand:LASX 0 "register_operand" "=f,f") +- (vec_duplicate:LASX ++ [(set (match_operand:ILASX 0 "register_operand" "=f,f") ++ (vec_duplicate:ILASX + (match_operand: 1 "reg_or_0_operand" "r,J")))] + "ISA_HAS_LASX" + { +@@ -1508,10 +1563,9 @@ + } + [(set_attr "type" "simd_fill") + (set_attr "mode" "") +- (set_attr "can_delay" "no") + (set_attr "length" "8")]) + +-(define_insn "lasx_xvflogb_" ++(define_insn "logb2" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFLOGB))] +@@ -1572,6 +1626,15 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++(define_insn "lasx_xvfrecipe_" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_RECIPE))] ++ "ISA_HAS_LASX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_DIV" ++ "xvfrecipe.\t%u0,%u1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvfrint_" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +@@ -1590,6 +1653,42 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++ ++(define_insn "lasx_xvfrsqrte_" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_RSQRTE))] ++ "ISA_HAS_LASX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_RSQRT" ++ "xvfrsqrte.\t%u0,%u1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ ++(define_expand "rsqrt2" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRSQRT))] ++ "ISA_HAS_LASX" ++{ ++ if (mode == V8SFmode ++ && TARGET_RECIP_VEC_RSQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt2" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRSQRT))] ++ "ISA_HAS_LASX" ++ "xvfrsqrt.\t%u0,%u1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvftint_s__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLASX 1 "register_operand" "f")] +@@ -2325,6 +2424,35 @@ + [(set_attr "type" "simd_shf") + (set_attr "mode" "")]) + ++(define_insn "lasx_xvshuf4i__1" ++ [(set (match_operand:LASX_W 0 "register_operand" "=f") ++ (vec_select:LASX_W ++ (match_operand:LASX_W 1 "nonimmediate_operand" "f") ++ (parallel [(match_operand 2 "const_0_to_3_operand") ++ (match_operand 3 "const_0_to_3_operand") ++ (match_operand 4 "const_0_to_3_operand") ++ (match_operand 5 "const_0_to_3_operand") ++ (match_operand 6 "const_4_to_7_operand") ++ (match_operand 7 "const_4_to_7_operand") ++ (match_operand 8 "const_4_to_7_operand") ++ (match_operand 9 "const_4_to_7_operand")])))] ++ "ISA_HAS_LASX ++ && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) ++ && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) ++ && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) ++ && INTVAL (operands[5]) + 4 == INTVAL (operands[9])" ++{ ++ int mask = 0; ++ mask |= INTVAL (operands[2]) << 0; ++ mask |= INTVAL (operands[3]) << 2; ++ mask |= INTVAL (operands[4]) << 4; ++ mask |= INTVAL (operands[5]) << 6; ++ operands[2] = GEN_INT (mask); ++ ++ return "xvshuf4i.w\t%u0,%u1,%2"; ++} ++ [(set_attr "type" "simd_shf") ++ (set_attr "mode" "")]) + + (define_insn "lasx_xvsrar_" + [(set (match_operand:ILASX 0 "register_operand" "=f") +@@ -2386,11 +2514,11 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvshuf_" +- [(set (match_operand:ILASX_DWH 0 "register_operand" "=f") +- (unspec:ILASX_DWH [(match_operand: 1 "register_operand" "0") +- (match_operand:ILASX_DWH 2 "register_operand" "f") +- (match_operand:ILASX_DWH 3 "register_operand" "f")] ++(define_insn "lasx_xvshuf_" ++ [(set (match_operand:LASX_DWH 0 "register_operand" "=f") ++ (unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0") ++ (match_operand:LASX_DWH 2 "register_operand" "f") ++ (match_operand:LASX_DWH 3 "register_operand" "f")] + UNSPEC_LASX_XVSHUF))] + "ISA_HAS_LASX" + "xvshuf.\t%u0,%u2,%u3" +@@ -2497,14 +2625,14 @@ + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + +- (define_insn "lasx_xvreplve0__scalar" +- [(set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX [(match_operand: 1 "register_operand" "f")] +- UNSPEC_LASX_XVREPLVE0))] +- "ISA_HAS_LASX" +- "xvreplve0.\t%u0,%u1" +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "")]) ++(define_insn "lasx_xvreplve0__scalar" ++[(set (match_operand:FLASX 0 "register_operand" "=f") ++ (vec_duplicate:FLASX ++ (match_operand: 1 "register_operand" "f")))] ++ "ISA_HAS_LASX" ++ "xvreplve0.\t%u0,%u1" ++ [(set_attr "type" "simd_splat") ++ (set_attr "mode" "")]) + + (define_insn "lasx_xvreplve0_q" + [(set (match_operand:V32QI 0 "register_operand" "=f") +@@ -2544,7 +2672,6 @@ + "xvfcvt.s.d\t%u0,%u2,%u1\n\txvpermi.d\t%u0,%u0,0xd8" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V8SF") +- (set_attr "can_delay" "no") + (set_attr "length" "8")]) + + ;; Define for builtin function. +@@ -2579,7 +2706,6 @@ + "xvpermi.d\t%u0,%u1,0xfa\n\txvfcvtl.d.s\t%u0,%u0" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4DF") +- (set_attr "can_delay" "no") + (set_attr "length" "12")]) + + ;; Define for builtin function. +@@ -2614,7 +2740,6 @@ + "xvpermi.d\t%u0,%u1,0x50\n\txvfcvtl.d.s\t%u0,%u0" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4DF") +- (set_attr "can_delay" "no") + (set_attr "length" "8")]) + + (define_code_attr lasxbr +@@ -2653,8 +2778,7 @@ + "xvset.\t%z3%u1\n\tbcnez\t%Z3%0"); + } + [(set_attr "type" "simd_branch") +- (set_attr "mode" "") +- (set_attr "compact_form" "never")]) ++ (set_attr "mode" "")]) + + (define_insn "lasx__v_" + [(set (pc) (if_then_else +@@ -2672,12 +2796,8 @@ + "xvset.v\t%Z3%u1\n\tbcnez\t%Z3%0"); + } + [(set_attr "type" "simd_branch") +- (set_attr "mode" "") +- (set_attr "compact_form" "never")]) +- +- ++ (set_attr "mode" "")]) + +- + ;; loongson-asx. + (define_insn "lasx_vext2xv_h_b" + [(set (match_operand:V16HI 0 "register_operand" "=f") +@@ -3339,8 +3459,8 @@ + (set_attr "mode" "V8SF")]) + + (define_insn "lasx_xvfrintrne_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] ++ [(set (match_operand:V8SF 0 "register_operand" "=f") ++ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRNE_S))] + "ISA_HAS_LASX" + "xvfrintrne.s\t%u0,%u1" +@@ -3348,8 +3468,8 @@ + (set_attr "mode" "V8SF")]) + + (define_insn "lasx_xvfrintrne_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] ++ [(set (match_operand:V4DF 0 "register_operand" "=f") ++ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRNE_D))] + "ISA_HAS_LASX" + "xvfrintrne.d\t%u0,%u1" +@@ -3357,8 +3477,8 @@ + (set_attr "mode" "V4DF")]) + + (define_insn "lasx_xvfrintrz_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] ++ [(set (match_operand:V8SF 0 "register_operand" "=f") ++ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRZ_S))] + "ISA_HAS_LASX" + "xvfrintrz.s\t%u0,%u1" +@@ -3366,8 +3486,8 @@ + (set_attr "mode" "V8SF")]) + + (define_insn "lasx_xvfrintrz_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] ++ [(set (match_operand:V4DF 0 "register_operand" "=f") ++ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRZ_D))] + "ISA_HAS_LASX" + "xvfrintrz.d\t%u0,%u1" +@@ -3375,8 +3495,8 @@ + (set_attr "mode" "V4DF")]) + + (define_insn "lasx_xvfrintrp_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] ++ [(set (match_operand:V8SF 0 "register_operand" "=f") ++ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRP_S))] + "ISA_HAS_LASX" + "xvfrintrp.s\t%u0,%u1" +@@ -3384,8 +3504,8 @@ + (set_attr "mode" "V8SF")]) + + (define_insn "lasx_xvfrintrp_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] ++ [(set (match_operand:V4DF 0 "register_operand" "=f") ++ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRP_D))] + "ISA_HAS_LASX" + "xvfrintrp.d\t%u0,%u1" +@@ -3393,8 +3513,8 @@ + (set_attr "mode" "V4DF")]) + + (define_insn "lasx_xvfrintrm_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] ++ [(set (match_operand:V8SF 0 "register_operand" "=f") ++ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRM_S))] + "ISA_HAS_LASX" + "xvfrintrm.s\t%u0,%u1" +@@ -3402,14 +3522,44 @@ + (set_attr "mode" "V8SF")]) + + (define_insn "lasx_xvfrintrm_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] ++ [(set (match_operand:V4DF 0 "register_operand" "=f") ++ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] + UNSPEC_LASX_XVFRINTRM_D))] + "ISA_HAS_LASX" + "xvfrintrm.d\t%u0,%u1" + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4DF")]) + ++;; Vector versions of the floating-point frint patterns. ++;; Expands to btrunc, ceil, floor, rint. ++(define_insn "v8sf2" ++ [(set (match_operand:V8SF 0 "register_operand" "=f") ++ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] ++ FRINT256_S))] ++ "ISA_HAS_LASX" ++ "xvfrint.s\t%u0,%u1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V8SF")]) ++ ++(define_insn "v4df2" ++ [(set (match_operand:V4DF 0 "register_operand" "=f") ++ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] ++ FRINT256_D))] ++ "ISA_HAS_LASX" ++ "xvfrint.d\t%u0,%u1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V4DF")]) ++ ++;; Expands to round. ++(define_insn "round2" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRINT))] ++ "ISA_HAS_LASX" ++ "xvfrint.\t%u0,%u1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "")]) ++ + ;; Offset load and broadcast + (define_expand "lasx_xvldrepl_" + [(match_operand:LASX 0 "register_operand") +@@ -3435,6 +3585,19 @@ + (set_attr "mode" "") + (set_attr "length" "4")]) + ++;; Offset is "0" ++(define_insn "lasx_xvldrepl__insn_0" ++ [(set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_duplicate:LASX ++ (mem: (match_operand:DI 1 "register_operand" "r"))))] ++ "ISA_HAS_LASX" ++{ ++ return "xvldrepl.\t%u0,%1,0"; ++} ++ [(set_attr "type" "simd_load") ++ (set_attr "mode" "") ++ (set_attr "length" "4")]) ++ + ;;XVADDWEV.H.B XVSUBWEV.H.B XVMULWEV.H.B + ;;XVADDWEV.H.BU XVSUBWEV.H.BU XVMULWEV.H.BU + (define_insn "lasx_xvwev_h_b" +@@ -4666,16 +4829,52 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvpermi_w" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SI 1 "register_operand" "0") +- (match_operand:V8SI 2 "register_operand" "f") +- (match_operand 3 "const_uimm8_operand" "")] +- UNSPEC_LASX_XVPERMI))] ++(define_mode_attr VDOUBLEMODEW256 ++ [(V8SI "V16SI") ++ (V8SF "V16SF")]) ++ ++(define_insn "lasx_xvpermi_" ++ [(set (match_operand:LASX_W 0 "register_operand" "=f") ++ (unspec:LASX_W [(match_operand:LASX_W 1 "register_operand" "0") ++ (match_operand:LASX_W 2 "register_operand" "f") ++ (match_operand 3 "const_uimm8_operand" "")] ++ UNSPEC_LASX_XVPERMI))] + "ISA_HAS_LASX" + "xvpermi.w\t%u0,%u2,%3" + [(set_attr "type" "simd_bit") +- (set_attr "mode" "V8SI")]) ++ (set_attr "mode" "")]) ++ ++(define_insn "lasx_xvpermi__1" ++ [(set (match_operand:LASX_W 0 "register_operand" "=f") ++ (vec_select:LASX_W ++ (vec_concat: ++ (match_operand:LASX_W 1 "register_operand" "f") ++ (match_operand:LASX_W 2 "register_operand" "0")) ++ (parallel [(match_operand 3 "const_0_to_3_operand") ++ (match_operand 4 "const_0_to_3_operand" ) ++ (match_operand 5 "const_8_to_11_operand" ) ++ (match_operand 6 "const_8_to_11_operand" ) ++ (match_operand 7 "const_4_to_7_operand" ) ++ (match_operand 8 "const_4_to_7_operand" ) ++ (match_operand 9 "const_12_to_15_operand") ++ (match_operand 10 "const_12_to_15_operand")])))] ++ "ISA_HAS_LASX ++ && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) ++ && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) ++ && INTVAL (operands[5]) + 4 == INTVAL (operands[9]) ++ && INTVAL (operands[6]) + 4 == INTVAL (operands[10])" ++{ ++ int mask = 0; ++ mask |= INTVAL (operands[3]) << 0; ++ mask |= INTVAL (operands[4]) << 2; ++ mask |= (INTVAL (operands[5]) - 8) << 4; ++ mask |= (INTVAL (operands[6]) - 8) << 6; ++ operands[3] = GEN_INT (mask); ++ ++ return "xvpermi.w\t%u0,%u1,%3"; ++} ++ [(set_attr "type" "simd_bit") ++ (set_attr "mode" "")]) + + (define_expand "lasx_xvld" + [(match_operand:V32QI 0 "register_operand") +@@ -4728,10 +4927,24 @@ + (set_attr "mode" "") + (set_attr "length" "4")]) + +-(define_insn "lasx_xvinsve0_" +- [(set (match_operand:ILASX_DW 0 "register_operand" "=f") +- (unspec:ILASX_DW [(match_operand:ILASX_DW 1 "register_operand" "0") +- (match_operand:ILASX_DW 2 "register_operand" "f") ++;; Offset is "0" ++(define_insn "lasx_xvstelm__insn_0" ++ [(set (mem: (match_operand:DI 0 "register_operand" "r")) ++ (vec_select: ++ (match_operand:LASX_WD 1 "register_operand" "f") ++ (parallel [(match_operand:SI 2 "const__operand")])))] ++ "ISA_HAS_LASX" ++{ ++ return "xvstelm.\t%u1,%0,0,%2"; ++} ++ [(set_attr "type" "simd_store") ++ (set_attr "mode" "") ++ (set_attr "length" "4")]) ++ ++(define_insn "lasx_xvinsve0_" ++ [(set (match_operand:LASX_WD 0 "register_operand" "=f") ++ (unspec:LASX_WD [(match_operand:LASX_WD 1 "register_operand" "0") ++ (match_operand:LASX_WD 2 "register_operand" "f") + (match_operand 3 "const__operand" "")] + UNSPEC_LASX_XVINSVE0))] + "ISA_HAS_LASX" +@@ -4739,6 +4952,18 @@ + [(set_attr "type" "simd_shf") + (set_attr "mode" "")]) + ++(define_insn "lasx_xvinsve0__scalar" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (vec_merge:FLASX ++ (vec_duplicate:FLASX ++ (match_operand: 1 "register_operand" "f")) ++ (match_operand:FLASX 2 "register_operand" "0") ++ (match_operand 3 "const__operand" "")))] ++ "ISA_HAS_LASX" ++ "xvinsve0.\t%u0,%u1,%y3" ++ [(set_attr "type" "simd_insert") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvpickve_" + [(set (match_operand:LASX_WD 0 "register_operand" "=f") + (unspec:LASX_WD [(match_operand:LASX_WD 1 "register_operand" "f") +@@ -4749,6 +4974,16 @@ + [(set_attr "type" "simd_shf") + (set_attr "mode" "")]) + ++(define_insn "lasx_xvpickve__scalar" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (vec_select: ++ (match_operand:FLASX 1 "register_operand" "f") ++ (parallel [(match_operand 2 "const__operand" "")])))] ++ "ISA_HAS_LASX" ++ "xvpickve.\t%u0,%u1,%2" ++ [(set_attr "type" "simd_shf") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvssrlrn__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:ILASX_DWH 1 "register_operand" "f") +@@ -4823,3 +5058,142 @@ + [(set_attr "type" "simd_store") + (set_attr "mode" "DI")]) + ++(define_insn "vec_widen_mult_even_v8si" ++ [(set (match_operand:V4DI 0 "register_operand" "=f") ++ (mult:V4DI ++ (any_extend:V4DI ++ (vec_select:V4SI ++ (match_operand:V8SI 1 "register_operand" "%f") ++ (parallel [(const_int 0) (const_int 2) ++ (const_int 4) (const_int 6)]))) ++ (any_extend:V4DI ++ (vec_select:V4SI ++ (match_operand:V8SI 2 "register_operand" "f") ++ (parallel [(const_int 0) (const_int 2) ++ (const_int 4) (const_int 6)])))))] ++ "ISA_HAS_LASX" ++ "xvmulwev.d.w\t%u0,%u1,%u2" ++ [(set_attr "type" "simd_int_arith") ++ (set_attr "mode" "V4DI")]) ++ ++;; Vector reduction operation ++(define_expand "reduc_plus_scal_v4di" ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:V4DI 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (V4DImode); ++ rtx tmp1 = gen_reg_rtx (V4DImode); ++ rtx vec_res = gen_reg_rtx (V4DImode); ++ emit_insn (gen_lasx_xvhaddw_q_d (tmp, operands[1], operands[1])); ++ emit_insn (gen_lasx_xvpermi_d_v4di (tmp1, tmp, GEN_INT (2))); ++ emit_insn (gen_addv4di3 (vec_res, tmp, tmp1)); ++ emit_insn (gen_vec_extractv4didi (operands[0], vec_res, const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_plus_scal_v8si" ++ [(match_operand:SI 0 "register_operand") ++ (match_operand:V8SI 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (V4DImode); ++ rtx tmp1 = gen_reg_rtx (V4DImode); ++ rtx vec_res = gen_reg_rtx (V4DImode); ++ emit_insn (gen_lasx_xvhaddw_d_w (tmp, operands[1], operands[1])); ++ emit_insn (gen_lasx_xvhaddw_q_d (tmp1, tmp, tmp)); ++ emit_insn (gen_lasx_xvpermi_d_v4di (tmp, tmp1, GEN_INT (2))); ++ emit_insn (gen_addv4di3 (vec_res, tmp, tmp1)); ++ emit_insn (gen_vec_extractv8sisi (operands[0], gen_lowpart(V8SImode,vec_res), const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_plus_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:FLASX 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_add3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc__scal_" ++ [(any_bitwise: ++ (match_operand: 0 "register_operand") ++ (match_operand:ILASX 1 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_smax_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:LASX 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_smax3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_smin_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:LASX 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_smin3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_umax_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:ILASX 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_umax3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_umin_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:ILASX 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_umin3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++;; merge vec_unpacks_hi_v8sf/vec_unpacks_lo_v8sf ++(define_peephole ++ [(set (match_operand:V4DF 0 "register_operand") ++ (float_extend:V4DF (vec_select:V4SF ++ (match_operand:V8SF 1 "register_operand") ++ (parallel [(const_int 0) (const_int 1) ++ (const_int 2) (const_int 3)])))) ++ (set (match_operand:V4DF 2 "register_operand") ++ (float_extend:V4DF (vec_select:V4SF ++ (match_operand:V8SF 3 "register_operand") ++ (parallel [(const_int 4) (const_int 5) ++ (const_int 6) (const_int 7)]))))] ++ "ISA_HAS_LASX && rtx_equal_p (operands[1], operands[3])" ++{ ++ return "xvpermi.d\t%u2,%u1,0xd8\n\txvfcvtl.d.s\t%u0,%u2\n\txvfcvth.d.s\t%u2,%u2"; ++}) +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index 185eee869..58f3047ac 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -3262,70 +3262,70 @@ __m256i __lasx_xvftintrnel_l_s(__m256 _1) + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V8SI, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrne_s(__m256 _1) ++__m256 __lasx_xvfrintrne_s(__m256 _1) + { +- return (__m256i)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V4DI, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrne_d(__m256d _1) ++__m256d __lasx_xvfrintrne_d(__m256d _1) + { +- return (__m256i)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V8SI, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrz_s(__m256 _1) ++__m256 __lasx_xvfrintrz_s(__m256 _1) + { +- return (__m256i)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V4DI, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrz_d(__m256d _1) ++__m256d __lasx_xvfrintrz_d(__m256d _1) + { +- return (__m256i)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V8SI, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrp_s(__m256 _1) ++__m256 __lasx_xvfrintrp_s(__m256 _1) + { +- return (__m256i)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V4DI, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrp_d(__m256d _1) ++__m256d __lasx_xvfrintrp_d(__m256d _1) + { +- return (__m256i)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V8SI, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrm_s(__m256 _1) ++__m256 __lasx_xvfrintrm_s(__m256 _1) + { +- return (__m256i)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); + } + + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V4DI, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvfrintrm_d(__m256d _1) ++__m256d __lasx_xvfrintrm_d(__m256d _1) + { +- return (__m256i)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); + } + + /* Assembly instruction format: xd, rj, si12. */ + /* Data types in instruction templates: V32QI, CVPOINTER, SI. */ +-#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) ((__m256i)__builtin_lasx_xvld((void *)(_1), (_2))) ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + + /* Assembly instruction format: xd, rj, si12. */ + /* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI. */ +@@ -3426,9 +3426,9 @@ __m256i __lasx_xvorn_v(__m256i _1, __m256i _2) + /* Assembly instruction format: xd, rj, rk. */ + /* Data types in instruction templates: V32QI, CVPOINTER, DI. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m256i __lasx_xvldx(void * _1, long int _2) ++__m256i __lasx_xvldx(void const * _1, long int _2) + { +- return (__m256i)__builtin_lasx_xvldx((void *)_1, (long int)_2); ++ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); + } + + /* Assembly instruction format: xd, rj, rk. */ +@@ -3609,19 +3609,19 @@ __m256i __lasx_xvperm_w(__m256i _1, __m256i _2) + + /* Assembly instruction format: xd, rj, si12. */ + /* Data types in instruction templates: V32QI, CVPOINTER, SI. */ +-#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) ((__m256i)__builtin_lasx_xvldrepl_b((void *)(_1), (_2))) ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + + /* Assembly instruction format: xd, rj, si11. */ + /* Data types in instruction templates: V16HI, CVPOINTER, SI. */ +-#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) ((__m256i)__builtin_lasx_xvldrepl_h((void *)(_1), (_2))) ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + + /* Assembly instruction format: xd, rj, si10. */ + /* Data types in instruction templates: V8SI, CVPOINTER, SI. */ +-#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) ((__m256i)__builtin_lasx_xvldrepl_w((void *)(_1), (_2))) ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + + /* Assembly instruction format: xd, rj, si9. */ + /* Data types in instruction templates: V4DI, CVPOINTER, SI. */ +-#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) ((__m256i)__builtin_lasx_xvldrepl_d((void *)(_1), (_2))) ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + + /* Assembly instruction format: rd, xj, ui3. */ + /* Data types in instruction templates: SI, V8SI, UQI. */ +diff --git a/gcc/config/loongarch/linux-common.h b/gcc/config/loongarch/linux-common.h +deleted file mode 100644 +index 9e1a1b50f..000000000 +--- a/gcc/config/loongarch/linux-common.h ++++ /dev/null +@@ -1,68 +0,0 @@ +-/* Definitions for LARCH running Linux-based GNU systems with ELF format. +- Copyright (C) 2012-2018 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-. */ +- +-#undef TARGET_OS_CPP_BUILTINS +-#define TARGET_OS_CPP_BUILTINS() \ +- do { \ +- GNU_USER_TARGET_OS_CPP_BUILTINS(); \ +- /* The GNU C++ standard library requires this. */ \ +- if (c_dialect_cxx ()) \ +- builtin_define ("_GNU_SOURCE"); \ +- ANDROID_TARGET_OS_CPP_BUILTINS(); \ +- } while (0) +- +-#define EXTRA_TARGET_D_OS_VERSIONS() \ +- ANDROID_TARGET_D_OS_VERSIONS(); +- +-#undef LINK_SPEC +-#define LINK_SPEC \ +- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LINK_SPEC, \ +- GNU_USER_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) +- +-#undef SUBTARGET_CC1_SPEC +-#define SUBTARGET_CC1_SPEC \ +- LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ +- GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC) +- +-#undef CC1PLUS_SPEC +-#define CC1PLUS_SPEC \ +- LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) +- +-#undef LIB_SPEC +-#define LIB_SPEC \ +- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ +- GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) +- +-#undef STARTFILE_SPEC +-#define STARTFILE_SPEC \ +- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) +- +-#undef ENDFILE_SPEC +-#define ENDFILE_SPEC \ +- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " " \ +- GNU_USER_TARGET_ENDFILE_SPEC, \ +- GNU_USER_TARGET_MATHFILE_SPEC " " \ +- ANDROID_ENDFILE_SPEC) +- +-/* Define this to be nonzero if static stack checking is supported. */ +-#define STACK_CHECK_STATIC_BUILTIN 1 +- +-/* FIXME*/ +-/* The default value isn't sufficient in 64-bit mode. */ +-#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024) +diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h +index 520a8ef32..59854251f 100644 +--- a/gcc/config/loongarch/linux.h ++++ b/gcc/config/loongarch/linux.h +@@ -1,4 +1,4 @@ +-/* Definitions for LARCH running Linux-based GNU systems with ELF format. ++/* Definitions for Linux-based systems with libraries in ELF format. + Copyright (C) 1998-2018 Free Software Foundation, Inc. + + This file is part of GCC. +@@ -17,17 +17,34 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +-#define GNU_USER_LINK_EMULATION32 "elf32loongarch" +-#define GNU_USER_LINK_EMULATION64 "elf64loongarch" ++/* Default system library search paths. ++ * This ensures that a compiler configured with --disable-multilib ++ * can work in a multilib environment. */ + +-#define GLIBC_DYNAMIC_LINKERLP32 \ +- "/lib32/ld.so.1" +-#define GLIBC_DYNAMIC_LINKERLP64 \ +- "/lib64/ld.so.1" ++#if defined(LA_DISABLE_MULTILIB) && defined(LA_DISABLE_MULTIARCH) + +-#define GNU_USER_DYNAMIC_LINKERLP32 GLIBC_DYNAMIC_LINKERLP32 +-#define GNU_USER_DYNAMIC_LINKERLP64 GLIBC_DYNAMIC_LINKERLP64 ++ #if DEFAULT_ABI_BASE == ABI_BASE_LP64D ++ #define ABI_LIBDIR "lib64" ++ #elif DEFAULT_ABI_BASE == ABI_BASE_LP64F ++ #define ABI_LIBDIR "lib64/f32" ++ #elif DEFAULT_ABI_BASE == ABI_BASE_LP64S ++ #define ABI_LIBDIR "lib64/sf" ++ #endif + ++#endif ++ ++#ifndef ABI_LIBDIR ++#define ABI_LIBDIR "lib" ++#endif ++ ++#define STANDARD_STARTFILE_PREFIX_1 "/" ABI_LIBDIR "/" ++#define STANDARD_STARTFILE_PREFIX_2 "/usr/" ABI_LIBDIR "/" ++ ++ ++/* Define this to be nonzero if static stack checking is supported. */ ++#define STACK_CHECK_STATIC_BUILTIN 1 ++ ++/* The default value isn't sufficient in 64-bit mode. */ ++#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024) + +-#undef TARGET_ASM_FILE_END + #define TARGET_ASM_FILE_END file_end_indicate_exec_stack +diff --git a/gcc/config/loongarch/loongarch-builtins.c b/gcc/config/loongarch/loongarch-builtins.c +index 9fa68b11f..b326ec46c 100644 +--- a/gcc/config/loongarch/loongarch-builtins.c ++++ b/gcc/config/loongarch/loongarch-builtins.c +@@ -1,7 +1,6 @@ +- +-/* Subroutines used for expanding LOONGARCH builtins. +- Copyright (C) 2011-2018 Free Software Foundation, Inc. +- Contributed by Andrew Waterman (andrew@sifive.com). ++/* Subroutines used for expanding LoongArch builtins. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Co. Ltd. + + This file is part of GCC. + +@@ -30,50 +29,29 @@ along with GCC; see the file COPYING3. If not see + #include "tree.h" + #include "memmodel.h" + #include "gimple.h" +-#include "cfghooks.h" +-#include "df.h" + #include "tm_p.h" +-#include "stringpool.h" +-#include "attribs.h" + #include "optabs.h" +-#include "regs.h" +-#include "emit-rtl.h" + #include "recog.h" +-#include "cgraph.h" + #include "diagnostic.h" +-#include "insn-attr.h" +-#include "output.h" +-#include "alias.h" + #include "fold-const.h" +-#include "varasm.h" +-#include "stor-layout.h" +-#include "calls.h" +-#include "explow.h" + #include "expr.h" +-#include "libfuncs.h" +-#include "reload.h" +-#include "common/common-target.h" + #include "langhooks.h" +-#include "cfgrtl.h" +-#include "cfganal.h" +-#include "sched-int.h" +-#include "gimplify.h" +-#include "target-globals.h" +-#include "tree-pass.h" +-#include "context.h" ++#include "emit-rtl.h" ++#include "explow.h" + #include "builtins.h" +-#include "rtl-iter.h" ++#include "stringpool.h" ++#include "case-cfn-macros.h" + +-/* This file should be included last. */ +-#include "target-def.h" + /* Macros to create an enumeration identifier for a function prototype. */ + #define LARCH_FTYPE_NAME1(A, B) LARCH_##A##_FTYPE_##B + #define LARCH_FTYPE_NAME2(A, B, C) LARCH_##A##_FTYPE_##B##_##C + #define LARCH_FTYPE_NAME3(A, B, C, D) LARCH_##A##_FTYPE_##B##_##C##_##D +-#define LARCH_FTYPE_NAME4(A, B, C, D, E) LARCH_##A##_FTYPE_##B##_##C##_##D##_##E ++#define LARCH_FTYPE_NAME4(A, B, C, D, E) \ ++ LARCH_##A##_FTYPE_##B##_##C##_##D##_##E + + /* Classifies the prototype of a built-in function. */ +-enum loongarch_function_type { ++enum loongarch_function_type ++{ + #define DEF_LARCH_FTYPE(NARGS, LIST) LARCH_FTYPE_NAME##NARGS LIST, + #include "config/loongarch/loongarch-ftypes.def" + #undef DEF_LARCH_FTYPE +@@ -81,7 +59,8 @@ enum loongarch_function_type { + }; + + /* Specifies how a built-in function should be converted into rtl. */ +-enum loongarch_builtin_type { ++enum loongarch_builtin_type ++{ + /* The function corresponds directly to an .md pattern. The return + value is mapped to operand 0 and the arguments are mapped to + operands 1 and above. */ +@@ -91,23 +70,23 @@ enum loongarch_builtin_type { + value and the arguments are mapped to operands 0 and above. */ + LARCH_BUILTIN_DIRECT_NO_TARGET, + ++ /* For generating LoongArch LSX. */ ++ LARCH_BUILTIN_LSX, ++ + /* The function corresponds to an LSX conditional branch instruction + combined with a compare instruction. */ + LARCH_BUILTIN_LSX_TEST_BRANCH, + +- /* For generating LoongArch LSX. */ +- LARCH_BUILTIN_LSX, +- + /* For generating LoongArch LASX. */ + LARCH_BUILTIN_LASX, + + /* The function corresponds to an LASX conditional branch instruction + combined with a compare instruction. */ +- LARCH_BUILTIN_LASX_TEST_BRANCH, ++ LARCH_BUILTIN_LASX_TEST_BRANCH + + }; + +-/* Invoke MACRO (COND) for each C.cond.fmt condition. */ ++/* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition. */ + #define LARCH_FP_CONDITIONS(MACRO) \ + MACRO (f), \ + MACRO (un), \ +@@ -127,26 +106,27 @@ enum loongarch_builtin_type { + MACRO (ngt) + + /* Enumerates the codes above as LARCH_FP_COND_. */ +-#define DECLARE_LARCH_COND(X) LARCH_FP_COND_ ## X +-enum loongarch_fp_condition { ++#define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X ++enum loongarch_fp_condition ++{ + LARCH_FP_CONDITIONS (DECLARE_LARCH_COND) + }; + #undef DECLARE_LARCH_COND + + /* Index X provides the string representation of LARCH_FP_COND_. */ + #define STRINGIFY(X) #X +-const char *const loongarch_fp_conditions[16] = { +- LARCH_FP_CONDITIONS (STRINGIFY) +-}; ++const char *const ++loongarch_fp_conditions[16] = {LARCH_FP_CONDITIONS (STRINGIFY)}; + #undef STRINGIFY +-/* Declare an availability predicate for built-in functions that require ++ ++/* Declare an availability predicate for built-in functions that require + * COND to be true. NAME is the main part of the predicate's name. */ +-#define AVAIL_ALL(NAME, COND) \ +- static unsigned int \ +- loongarch_builtin_avail_##NAME (void) \ +- { \ +- return (COND) ? 1 : 0; \ +- } ++#define AVAIL_ALL(NAME, COND) \ ++ static unsigned int \ ++ loongarch_builtin_avail_##NAME (void) \ ++ { \ ++ return (COND) ? 1 : 0; \ ++ } + + static unsigned int + loongarch_builtin_avail_default (void) +@@ -154,14 +134,12 @@ loongarch_builtin_avail_default (void) + return 1; + } + /* This structure describes a single built-in function. */ +-struct loongarch_builtin_description { ++struct loongarch_builtin_description ++{ + /* The code of the main .md file instruction. See loongarch_builtin_type + for more information. */ + enum insn_code icode; + +- /* The floating-point comparison code to use with ICODE, if any. */ +- enum loongarch_fp_condition cond; +- + /* The name of the built-in function. */ + const char *name; + +@@ -176,8 +154,8 @@ struct loongarch_builtin_description { + }; + + AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) +-AVAIL_ALL (lsx, TARGET_LSX) +-AVAIL_ALL (lasx, TARGET_LASX) ++AVAIL_ALL (lsx, ISA_HAS_LSX) ++AVAIL_ALL (lasx, ISA_HAS_LASX) + + /* Construct a loongarch_builtin_description from the given arguments. + +@@ -194,31 +172,32 @@ AVAIL_ALL (lasx, TARGET_LASX) + + AVAIL is the name of the availability predicate, without the leading + loongarch_builtin_avail_. */ +-#define LARCH_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE, \ +- FUNCTION_TYPE, AVAIL) \ +- { CODE_FOR_loongarch_ ## INSN, LARCH_FP_COND_ ## COND, \ +- "__builtin_loongarch_" NAME, BUILTIN_TYPE, FUNCTION_TYPE, \ +- loongarch_builtin_avail_ ## AVAIL } ++#define LARCH_BUILTIN(INSN, NAME, BUILTIN_TYPE, FUNCTION_TYPE, AVAIL) \ ++ { \ ++ CODE_FOR_loongarch_##INSN, "__builtin_loongarch_" NAME, \ ++ BUILTIN_TYPE, FUNCTION_TYPE, \ ++ loongarch_builtin_avail_##AVAIL \ ++ } + + /* Define __builtin_loongarch_, which is a LARCH_BUILTIN_DIRECT function + mapped to instruction CODE_FOR_loongarch_, FUNCTION_TYPE and AVAIL + are as for LARCH_BUILTIN. */ +-#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ +- LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL) ++#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ LARCH_BUILTIN (INSN, #INSN, LARCH_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL) + + /* Define __builtin_loongarch_, which is a LARCH_BUILTIN_DIRECT_NO_TARGET + function mapped to instruction CODE_FOR_loongarch_, FUNCTION_TYPE + and AVAIL are as for LARCH_BUILTIN. */ +-#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ +- LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \ +- FUNCTION_TYPE, AVAIL) ++#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ LARCH_BUILTIN (INSN, #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \ ++ FUNCTION_TYPE, AVAIL) + + /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_ + for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description + field. */ + #define LSX_BUILTIN(INSN, FUNCTION_TYPE) \ +- { CODE_FOR_lsx_ ## INSN, LARCH_FP_COND_f, \ +- "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ ++ { CODE_FOR_lsx_ ## INSN, \ ++ "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ + FUNCTION_TYPE, loongarch_builtin_avail_lsx } + + +@@ -226,7 +205,7 @@ AVAIL_ALL (lasx, TARGET_LASX) + for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description + field. */ + #define LSX_BUILTIN_TEST_BRANCH(INSN, FUNCTION_TYPE) \ +- { CODE_FOR_lsx_ ## INSN, LARCH_FP_COND_f, \ ++ { CODE_FOR_lsx_ ## INSN, \ + "__builtin_lsx_" #INSN, LARCH_BUILTIN_LSX_TEST_BRANCH, \ + FUNCTION_TYPE, loongarch_builtin_avail_lsx } + +@@ -234,7 +213,7 @@ AVAIL_ALL (lasx, TARGET_LASX) + for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description + field. */ + #define LSX_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE) \ +- { CODE_FOR_lsx_ ## INSN, LARCH_FP_COND_f, \ ++ { CODE_FOR_lsx_ ## INSN, \ + "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \ + FUNCTION_TYPE, loongarch_builtin_avail_lsx } + +@@ -242,7 +221,7 @@ AVAIL_ALL (lasx, TARGET_LASX) + for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description + field. */ + #define LASX_BUILTIN(INSN, FUNCTION_TYPE) \ +- { CODE_FOR_lasx_ ## INSN, LARCH_FP_COND_f, \ ++ { CODE_FOR_lasx_ ## INSN, \ + "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ + FUNCTION_TYPE, loongarch_builtin_avail_lasx } + +@@ -250,7 +229,7 @@ AVAIL_ALL (lasx, TARGET_LASX) + for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description + field. */ + #define LASX_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE) \ +- { CODE_FOR_lasx_ ## INSN, LARCH_FP_COND_f, \ ++ { CODE_FOR_lasx_ ## INSN, \ + "__builtin_lasx_" #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \ + FUNCTION_TYPE, loongarch_builtin_avail_lasx } + +@@ -258,65 +237,10 @@ AVAIL_ALL (lasx, TARGET_LASX) + for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description + field. */ + #define LASX_BUILTIN_TEST_BRANCH(INSN, FUNCTION_TYPE) \ +- { CODE_FOR_lasx_ ## INSN, LARCH_FP_COND_f, \ ++ { CODE_FOR_lasx_ ## INSN, \ + "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX_TEST_BRANCH, \ + FUNCTION_TYPE, loongarch_builtin_avail_lasx } + +-/* LoongArch BASE instructions define CODE_FOR_loongarch_xxx */ +-#define CODE_FOR_loongarch_fmax_sf CODE_FOR_smaxsf3 +-#define CODE_FOR_loongarch_fmax_df CODE_FOR_smaxdf3 +-#define CODE_FOR_loongarch_fmin_sf CODE_FOR_sminsf3 +-#define CODE_FOR_loongarch_fmin_df CODE_FOR_smindf3 +-#define CODE_FOR_loongarch_fmaxa_sf CODE_FOR_smaxasf3 +-#define CODE_FOR_loongarch_fmaxa_df CODE_FOR_smaxadf3 +-#define CODE_FOR_loongarch_fmina_sf CODE_FOR_sminasf3 +-#define CODE_FOR_loongarch_fmina_df CODE_FOR_sminadf3 +-#define CODE_FOR_loongarch_fclass_s CODE_FOR_fclass_s +-#define CODE_FOR_loongarch_fclass_d CODE_FOR_fclass_d +-#define CODE_FOR_loongarch_frint_s CODE_FOR_frint_s +-#define CODE_FOR_loongarch_frint_d CODE_FOR_frint_d +-#define CODE_FOR_loongarch_bytepick_w CODE_FOR_bytepick_w +-#define CODE_FOR_loongarch_bytepick_d CODE_FOR_bytepick_d +-#define CODE_FOR_loongarch_bitrev_4b CODE_FOR_bitrev_4b +-#define CODE_FOR_loongarch_bitrev_8b CODE_FOR_bitrev_8b +- +-/* LoongArch support crc */ +-#define CODE_FOR_loongarch_crc_w_b_w CODE_FOR_crc_w_b_w +-#define CODE_FOR_loongarch_crc_w_h_w CODE_FOR_crc_w_h_w +-#define CODE_FOR_loongarch_crc_w_w_w CODE_FOR_crc_w_w_w +-#define CODE_FOR_loongarch_crc_w_d_w CODE_FOR_crc_w_d_w +-#define CODE_FOR_loongarch_crcc_w_b_w CODE_FOR_crcc_w_b_w +-#define CODE_FOR_loongarch_crcc_w_h_w CODE_FOR_crcc_w_h_w +-#define CODE_FOR_loongarch_crcc_w_w_w CODE_FOR_crcc_w_w_w +-#define CODE_FOR_loongarch_crcc_w_d_w CODE_FOR_crcc_w_d_w +- +-/* Privileged state instruction */ +-#define CODE_FOR_loongarch_cpucfg CODE_FOR_cpucfg +-#define CODE_FOR_loongarch_asrtle_d CODE_FOR_asrtle_d +-#define CODE_FOR_loongarch_asrtgt_d CODE_FOR_asrtgt_d +-#define CODE_FOR_loongarch_csrrd CODE_FOR_csrrd +-#define CODE_FOR_loongarch_dcsrrd CODE_FOR_dcsrrd +-#define CODE_FOR_loongarch_csrwr CODE_FOR_csrwr +-#define CODE_FOR_loongarch_dcsrwr CODE_FOR_dcsrwr +-#define CODE_FOR_loongarch_csrxchg CODE_FOR_csrxchg +-#define CODE_FOR_loongarch_dcsrxchg CODE_FOR_dcsrxchg +-#define CODE_FOR_loongarch_iocsrrd_b CODE_FOR_iocsrrd_b +-#define CODE_FOR_loongarch_iocsrrd_h CODE_FOR_iocsrrd_h +-#define CODE_FOR_loongarch_iocsrrd_w CODE_FOR_iocsrrd_w +-#define CODE_FOR_loongarch_iocsrrd_d CODE_FOR_iocsrrd_d +-#define CODE_FOR_loongarch_iocsrwr_b CODE_FOR_iocsrwr_b +-#define CODE_FOR_loongarch_iocsrwr_h CODE_FOR_iocsrwr_h +-#define CODE_FOR_loongarch_iocsrwr_w CODE_FOR_iocsrwr_w +-#define CODE_FOR_loongarch_iocsrwr_d CODE_FOR_iocsrwr_d +-#define CODE_FOR_loongarch_lddir CODE_FOR_lddir +-#define CODE_FOR_loongarch_dlddir CODE_FOR_dlddir +-#define CODE_FOR_loongarch_ldpte CODE_FOR_ldpte +-#define CODE_FOR_loongarch_dldpte CODE_FOR_dldpte +-#define CODE_FOR_loongarch_cacop CODE_FOR_cacop +-#define CODE_FOR_loongarch_dcacop CODE_FOR_dcacop +-#define CODE_FOR_loongarch_dbar CODE_FOR_dbar +-#define CODE_FOR_loongarch_ibar CODE_FOR_ibar +- + /* LoongArch SX define CODE_FOR_lsx_xxx */ + #define CODE_FOR_lsx_vsadd_b CODE_FOR_ssaddv16qi3 + #define CODE_FOR_lsx_vsadd_h CODE_FOR_ssaddv8hi3 +@@ -389,6 +313,8 @@ AVAIL_ALL (lasx, TARGET_LASX) + #define CODE_FOR_lsx_vfmin_d CODE_FOR_sminv2df3 + #define CODE_FOR_lsx_vfsqrt_s CODE_FOR_sqrtv4sf2 + #define CODE_FOR_lsx_vfsqrt_d CODE_FOR_sqrtv2df2 ++#define CODE_FOR_lsx_vflogb_s CODE_FOR_logbv4sf2 ++#define CODE_FOR_lsx_vflogb_d CODE_FOR_logbv2df2 + #define CODE_FOR_lsx_vmax_b CODE_FOR_smaxv16qi3 + #define CODE_FOR_lsx_vmax_h CODE_FOR_smaxv8hi3 + #define CODE_FOR_lsx_vmax_w CODE_FOR_smaxv4si3 +@@ -654,6 +580,8 @@ AVAIL_ALL (lasx, TARGET_LASX) + #define CODE_FOR_lasx_xvfmin_d CODE_FOR_sminv4df3 + #define CODE_FOR_lasx_xvfsqrt_s CODE_FOR_sqrtv8sf2 + #define CODE_FOR_lasx_xvfsqrt_d CODE_FOR_sqrtv4df2 ++#define CODE_FOR_lasx_xvflogb_s CODE_FOR_logbv8sf2 ++#define CODE_FOR_lasx_xvflogb_d CODE_FOR_logbv4df2 + #define CODE_FOR_lasx_xvmax_b CODE_FOR_smaxv32qi3 + #define CODE_FOR_lasx_xvmax_h CODE_FOR_smaxv16hi3 + #define CODE_FOR_lasx_xvmax_w CODE_FOR_smaxv8si3 +@@ -771,6 +699,7 @@ AVAIL_ALL (lasx, TARGET_LASX) + #define CODE_FOR_lasx_xvfnmsub_d CODE_FOR_xvfnmsubv4df4_nmsub4 + + #define CODE_FOR_lasx_xvpermi_q CODE_FOR_lasx_xvpermi_q_v32qi ++#define CODE_FOR_lasx_xvpermi_d CODE_FOR_lasx_xvpermi_d_v4di + #define CODE_FOR_lasx_xbnz_v CODE_FOR_lasx_xbnz_v_b + #define CODE_FOR_lasx_xbz_v CODE_FOR_lasx_xbz_v_b + +@@ -857,36 +786,17 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + #define LARCH_MOVGR2FCSR 1 + DIRECT_NO_TARGET_BUILTIN (movgr2fcsr, LARCH_VOID_FTYPE_UQI_USI, hard_float), + +- DIRECT_NO_TARGET_BUILTIN (cacop, LARCH_VOID_FTYPE_USI_USI_SI, default), +- DIRECT_NO_TARGET_BUILTIN (dcacop, LARCH_VOID_FTYPE_USI_UDI_SI, default), ++ DIRECT_NO_TARGET_BUILTIN (cacop_w, LARCH_VOID_FTYPE_USI_USI_SI, default), ++ DIRECT_NO_TARGET_BUILTIN (cacop_d, LARCH_VOID_FTYPE_USI_UDI_SI, default), + DIRECT_NO_TARGET_BUILTIN (dbar, LARCH_VOID_FTYPE_USI, default), + DIRECT_NO_TARGET_BUILTIN (ibar, LARCH_VOID_FTYPE_USI, default), + +- DIRECT_BUILTIN (fmax_sf, LARCH_SF_FTYPE_SF_SF, hard_float), +- DIRECT_BUILTIN (fmax_df, LARCH_DF_FTYPE_DF_DF, hard_float), +- DIRECT_BUILTIN (fmin_sf, LARCH_SF_FTYPE_SF_SF, hard_float), +- DIRECT_BUILTIN (fmin_df, LARCH_DF_FTYPE_DF_DF, hard_float), +- DIRECT_BUILTIN (fmaxa_sf, LARCH_SF_FTYPE_SF_SF, hard_float), +- DIRECT_BUILTIN (fmaxa_df, LARCH_DF_FTYPE_DF_DF, hard_float), +- DIRECT_BUILTIN (fmina_sf, LARCH_SF_FTYPE_SF_SF, hard_float), +- DIRECT_BUILTIN (fmina_df, LARCH_DF_FTYPE_DF_DF, hard_float), +- DIRECT_BUILTIN (fclass_s, LARCH_SF_FTYPE_SF, hard_float), +- DIRECT_BUILTIN (fclass_d, LARCH_DF_FTYPE_DF, hard_float), +- DIRECT_BUILTIN (frint_s, LARCH_SF_FTYPE_SF, hard_float), +- DIRECT_BUILTIN (frint_d, LARCH_DF_FTYPE_DF, hard_float), +- DIRECT_BUILTIN (bytepick_w, LARCH_SI_FTYPE_SI_SI_QI, default), +- DIRECT_BUILTIN (bytepick_d, LARCH_DI_FTYPE_DI_DI_QI, default), +- DIRECT_BUILTIN (bitrev_4b, LARCH_SI_FTYPE_SI, default), +- DIRECT_BUILTIN (bitrev_8b, LARCH_DI_FTYPE_DI, default), +- DIRECT_BUILTIN (cpucfg, LARCH_USI_FTYPE_USI, default), +- DIRECT_BUILTIN (asrtle_d, LARCH_VOID_FTYPE_DI_DI, default), +- DIRECT_BUILTIN (asrtgt_d, LARCH_VOID_FTYPE_DI_DI, default), +- DIRECT_BUILTIN (dlddir, LARCH_DI_FTYPE_DI_UQI, default), +- DIRECT_BUILTIN (lddir, LARCH_SI_FTYPE_SI_UQI, default), +- DIRECT_NO_TARGET_BUILTIN (dldpte, LARCH_VOID_FTYPE_DI_UQI, default), +- DIRECT_NO_TARGET_BUILTIN (ldpte, LARCH_VOID_FTYPE_SI_UQI, default), ++ DIRECT_BUILTIN (lddir_d, LARCH_DI_FTYPE_DI_UQI, default), ++ DIRECT_BUILTIN (lddir_w, LARCH_SI_FTYPE_SI_UQI, default), ++ DIRECT_NO_TARGET_BUILTIN (ldpte_d, LARCH_VOID_FTYPE_DI_UQI, default), ++ DIRECT_NO_TARGET_BUILTIN (ldpte_w, LARCH_VOID_FTYPE_SI_UQI, default), + +- /* CRC Instrinsic */ ++ /* CRC Instrinsic */ + + DIRECT_BUILTIN (crc_w_b_w, LARCH_SI_FTYPE_QI_SI, default), + DIRECT_BUILTIN (crc_w_h_w, LARCH_SI_FTYPE_HI_SI, default), +@@ -897,12 +807,12 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + DIRECT_BUILTIN (crcc_w_w_w, LARCH_SI_FTYPE_SI_SI, default), + DIRECT_BUILTIN (crcc_w_d_w, LARCH_SI_FTYPE_DI_SI, default), + +- DIRECT_BUILTIN (csrrd, LARCH_USI_FTYPE_USI, default), +- DIRECT_BUILTIN (dcsrrd, LARCH_UDI_FTYPE_USI, default), +- DIRECT_BUILTIN (csrwr, LARCH_USI_FTYPE_USI_USI, default), +- DIRECT_BUILTIN (dcsrwr, LARCH_UDI_FTYPE_UDI_USI, default), +- DIRECT_BUILTIN (csrxchg, LARCH_USI_FTYPE_USI_USI_USI, default), +- DIRECT_BUILTIN (dcsrxchg, LARCH_UDI_FTYPE_UDI_UDI_USI, default), ++ DIRECT_BUILTIN (csrrd_w, LARCH_USI_FTYPE_USI, default), ++ DIRECT_BUILTIN (csrrd_d, LARCH_UDI_FTYPE_USI, default), ++ DIRECT_BUILTIN (csrwr_w, LARCH_USI_FTYPE_USI_USI, default), ++ DIRECT_BUILTIN (csrwr_d, LARCH_UDI_FTYPE_UDI_USI, default), ++ DIRECT_BUILTIN (csrxchg_w, LARCH_USI_FTYPE_USI_USI_USI, default), ++ DIRECT_BUILTIN (csrxchg_d, LARCH_UDI_FTYPE_UDI_UDI_USI, default), + DIRECT_BUILTIN (iocsrrd_b, LARCH_UQI_FTYPE_USI, default), + DIRECT_BUILTIN (iocsrrd_h, LARCH_UHI_FTYPE_USI, default), + DIRECT_BUILTIN (iocsrrd_w, LARCH_USI_FTYPE_USI, default), +@@ -912,6 +822,12 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + DIRECT_NO_TARGET_BUILTIN (iocsrwr_w, LARCH_VOID_FTYPE_USI_USI, default), + DIRECT_NO_TARGET_BUILTIN (iocsrwr_d, LARCH_VOID_FTYPE_UDI_USI, default), + ++ DIRECT_BUILTIN (cpucfg, LARCH_USI_FTYPE_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (asrtle_d, LARCH_VOID_FTYPE_DI_DI, default), ++ DIRECT_NO_TARGET_BUILTIN (asrtgt_d, LARCH_VOID_FTYPE_DI_DI, default), ++ DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default), ++ + /* Built-in functions for LSX. */ + LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI), + LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI), +@@ -1439,14 +1355,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + LSX_BUILTIN (vftintrmh_l_s, LARCH_V2DI_FTYPE_V4SF), + LSX_BUILTIN (vftintrnel_l_s, LARCH_V2DI_FTYPE_V4SF), + LSX_BUILTIN (vftintrneh_l_s, LARCH_V2DI_FTYPE_V4SF), +- LSX_BUILTIN (vfrintrne_s, LARCH_V4SI_FTYPE_V4SF), +- LSX_BUILTIN (vfrintrne_d, LARCH_V2DI_FTYPE_V2DF), +- LSX_BUILTIN (vfrintrz_s, LARCH_V4SI_FTYPE_V4SF), +- LSX_BUILTIN (vfrintrz_d, LARCH_V2DI_FTYPE_V2DF), +- LSX_BUILTIN (vfrintrp_s, LARCH_V4SI_FTYPE_V4SF), +- LSX_BUILTIN (vfrintrp_d, LARCH_V2DI_FTYPE_V2DF), +- LSX_BUILTIN (vfrintrm_s, LARCH_V4SI_FTYPE_V4SF), +- LSX_BUILTIN (vfrintrm_d, LARCH_V2DI_FTYPE_V2DF), ++ LSX_BUILTIN (vfrintrne_s, LARCH_V4SF_FTYPE_V4SF), ++ LSX_BUILTIN (vfrintrne_d, LARCH_V2DF_FTYPE_V2DF), ++ LSX_BUILTIN (vfrintrz_s, LARCH_V4SF_FTYPE_V4SF), ++ LSX_BUILTIN (vfrintrz_d, LARCH_V2DF_FTYPE_V2DF), ++ LSX_BUILTIN (vfrintrp_s, LARCH_V4SF_FTYPE_V4SF), ++ LSX_BUILTIN (vfrintrp_d, LARCH_V2DF_FTYPE_V2DF), ++ LSX_BUILTIN (vfrintrm_s, LARCH_V4SF_FTYPE_V4SF), ++ LSX_BUILTIN (vfrintrm_d, LARCH_V2DF_FTYPE_V2DF), + LSX_NO_TARGET_BUILTIN (vstelm_b, LARCH_VOID_FTYPE_V16QI_CVPOINTER_SI_UQI), + LSX_NO_TARGET_BUILTIN (vstelm_h, LARCH_VOID_FTYPE_V8HI_CVPOINTER_SI_UQI), + LSX_NO_TARGET_BUILTIN (vstelm_w, LARCH_VOID_FTYPE_V4SI_CVPOINTER_SI_UQI), +@@ -2152,14 +2068,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + LASX_BUILTIN (xvftintrml_l_s, LARCH_V4DI_FTYPE_V8SF), + LASX_BUILTIN (xvftintrneh_l_s, LARCH_V4DI_FTYPE_V8SF), + LASX_BUILTIN (xvftintrnel_l_s, LARCH_V4DI_FTYPE_V8SF), +- LASX_BUILTIN (xvfrintrne_s, LARCH_V8SI_FTYPE_V8SF), +- LASX_BUILTIN (xvfrintrne_d, LARCH_V4DI_FTYPE_V4DF), +- LASX_BUILTIN (xvfrintrz_s, LARCH_V8SI_FTYPE_V8SF), +- LASX_BUILTIN (xvfrintrz_d, LARCH_V4DI_FTYPE_V4DF), +- LASX_BUILTIN (xvfrintrp_s, LARCH_V8SI_FTYPE_V8SF), +- LASX_BUILTIN (xvfrintrp_d, LARCH_V4DI_FTYPE_V4DF), +- LASX_BUILTIN (xvfrintrm_s, LARCH_V8SI_FTYPE_V8SF), +- LASX_BUILTIN (xvfrintrm_d, LARCH_V4DI_FTYPE_V4DF), ++ LASX_BUILTIN (xvfrintrne_s, LARCH_V8SF_FTYPE_V8SF), ++ LASX_BUILTIN (xvfrintrne_d, LARCH_V4DF_FTYPE_V4DF), ++ LASX_BUILTIN (xvfrintrz_s, LARCH_V8SF_FTYPE_V8SF), ++ LASX_BUILTIN (xvfrintrz_d, LARCH_V4DF_FTYPE_V4DF), ++ LASX_BUILTIN (xvfrintrp_s, LARCH_V8SF_FTYPE_V8SF), ++ LASX_BUILTIN (xvfrintrp_d, LARCH_V4DF_FTYPE_V4DF), ++ LASX_BUILTIN (xvfrintrm_s, LARCH_V8SF_FTYPE_V8SF), ++ LASX_BUILTIN (xvfrintrm_d, LARCH_V4DF_FTYPE_V4DF), + LASX_BUILTIN (xvld, LARCH_V32QI_FTYPE_CVPOINTER_SI), + LASX_NO_TARGET_BUILTIN (xvst, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI), + LASX_NO_TARGET_BUILTIN (xvstelm_b, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI_UQI), +@@ -2391,6 +2307,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI), + }; + ++/* Index I is the function declaration for loongarch_builtins[I], or null if ++ the function isn't defined on this target. */ ++static GTY (()) tree loongarch_builtin_decls[ARRAY_SIZE (loongarch_builtins)]; ++/* Get the index I of the function declaration for loongarch_builtin_decls[I] ++ using the instruction code or return null if not defined for the target. */ ++static GTY (()) int loongarch_get_builtin_decl_index[NUM_INSN_CODES]; ++ ++/* Return a type for 'const volatile void*'. */ ++ ++static tree ++loongarch_build_cvpointer_type (void) ++{ ++ static tree cache; ++ ++ if (cache == NULL_TREE) ++ cache = build_pointer_type (build_qualified_type (void_type_node, ++ TYPE_QUAL_CONST ++ | TYPE_QUAL_VOLATILE)); ++ return cache; ++} ++ + + /* MODE is a vector mode whose elements have type TYPE. Return the type + of the vector itself. */ +@@ -2411,26 +2348,12 @@ loongarch_builtin_vector_type (tree type, machine_mode mode) + return types[mode_index]; + } + +-/* Return a type for 'const volatile void *'. */ +- +-static tree +-loongarch_build_cvpointer_type (void) +-{ +- static tree cache; +- +- if (cache == NULL_TREE) +- cache = build_pointer_type (build_qualified_type +- (void_type_node, +- TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); +- return cache; +-} +- + /* Source-level argument types. */ + #define LARCH_ATYPE_VOID void_type_node + #define LARCH_ATYPE_INT integer_type_node + #define LARCH_ATYPE_POINTER ptr_type_node + #define LARCH_ATYPE_CVPOINTER loongarch_build_cvpointer_type () +-#define LARCH_ATYPE_BOOLEAN boolean_type_node ++#define LARCH_ATYPE_BOOLEAN boolean_type_node + /* Standard mode-based argument types. */ + #define LARCH_ATYPE_QI intQI_type_node + #define LARCH_ATYPE_UQI unsigned_intQI_type_node +@@ -2495,8 +2418,7 @@ loongarch_build_cvpointer_type (void) + + /* LARCH_FTYPE_ATYPESN takes N LARCH_FTYPES-like type codes and lists + their associated LARCH_ATYPEs. */ +-#define LARCH_FTYPE_ATYPES1(A, B) \ +- LARCH_ATYPE_##A, LARCH_ATYPE_##B ++#define LARCH_FTYPE_ATYPES1(A, B) LARCH_ATYPE_##A, LARCH_ATYPE_##B + + #define LARCH_FTYPE_ATYPES2(A, B, C) \ + LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C +@@ -2508,13 +2430,6 @@ loongarch_build_cvpointer_type (void) + LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C, LARCH_ATYPE_##D, \ + LARCH_ATYPE_##E + +-/* Index I is the function declaration for loongarch_builtins[I], or null if the +- function isn't defined on this target. */ +-static GTY(()) tree loongarch_builtin_decls[ARRAY_SIZE (loongarch_builtins)]; +-/* Get the index I of the function declaration for loongarch_builtin_decls[I] +- using the instruction code or return null if not defined for the target. */ +-static GTY(()) int loongarch_get_builtin_decl_index[NUM_INSN_CODES]; +- + /* Return the function type associated with function prototype TYPE. */ + + static tree +@@ -2525,11 +2440,10 @@ loongarch_build_function_type (enum loongarch_function_type type) + if (types[(int) type] == NULL_TREE) + switch (type) + { +-#define DEF_LARCH_FTYPE(NUM, ARGS) \ +- case LARCH_FTYPE_NAME##NUM ARGS: \ +- types[(int) type] \ +- = build_function_type_list (LARCH_FTYPE_ATYPES##NUM ARGS, \ +- NULL_TREE); \ ++#define DEF_LARCH_FTYPE(NUM, ARGS) \ ++ case LARCH_FTYPE_NAME##NUM ARGS: \ ++ types[(int) type] \ ++ = build_function_type_list (LARCH_FTYPE_ATYPES##NUM ARGS, NULL_TREE); \ + break; + #include "config/loongarch/loongarch-ftypes.def" + #undef DEF_LARCH_FTYPE +@@ -2547,6 +2461,7 @@ loongarch_init_builtins (void) + { + const struct loongarch_builtin_description *d; + unsigned int i; ++ tree type; + + /* Iterate through all of the bdesc arrays, initializing all of the + builtin functions. */ +@@ -2555,10 +2470,10 @@ loongarch_init_builtins (void) + d = &loongarch_builtins[i]; + if (d->avail ()) + { ++ type = loongarch_build_function_type (d->function_type); + loongarch_builtin_decls[i] +- = add_builtin_function (d->name, +- loongarch_build_function_type (d->function_type), +- i, BUILT_IN_MD, NULL, NULL); ++ = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, ++ NULL); + loongarch_get_builtin_decl_index[d->icode] = i; + } + } +@@ -2574,6 +2489,104 @@ loongarch_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED) + return loongarch_builtin_decls[code]; + } + ++/* Handler for an SLEEF-style interface to ++ a library with vectorized intrinsics. */ ++static tree ++loongarch_builtin_vectorized_libsleef (combined_fn fn, tree type_out, tree type_in) ++{ ++ char name[20]; ++ tree fntype, new_fndecl; ++ unsigned args = 1; ++ const char *bname; ++ machine_mode el_mode, in_mode; ++ int n, in_n; ++ ++ /* The SLEEF is suitable for unsafe math only. */ ++ if (!flag_unsafe_math_optimizations || !ISA_HAS_LSX) ++ return NULL_TREE; ++ ++ el_mode = TYPE_MODE (TREE_TYPE (type_out)); ++ n = TYPE_VECTOR_SUBPARTS (type_out); ++ in_mode = TYPE_MODE (TREE_TYPE (type_in)); ++ in_n = TYPE_VECTOR_SUBPARTS (type_in); ++ if (el_mode != in_mode ++ || n != in_n) ++ return NULL_TREE; ++ ++ switch (fn) ++ { ++ CASE_CFN_ATAN2: ++ CASE_CFN_POW: ++ args = 2; ++ gcc_fallthrough (); ++ ++ CASE_CFN_EXP: ++ CASE_CFN_LOG: ++ CASE_CFN_LOG1P: ++ CASE_CFN_LOG2: ++ CASE_CFN_LOG10: ++ CASE_CFN_TANH: ++ CASE_CFN_TAN: ++ CASE_CFN_ATAN: ++ CASE_CFN_ATANH: ++ CASE_CFN_CBRT: ++ CASE_CFN_SINH: ++ CASE_CFN_SIN: ++ CASE_CFN_ASINH: ++ CASE_CFN_ASIN: ++ CASE_CFN_COSH: ++ CASE_CFN_COS: ++ CASE_CFN_ACOSH: ++ CASE_CFN_ACOS: ++ break; ++ ++ default: ++ return NULL_TREE; ++ } ++ ++ tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); ++ bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); ++ ++ if (args == 1) ++ { ++ if (n == 8 && el_mode == SFmode) ++ sprintf (name, "_ZGVdN8v_%s", bname+10); ++ else if (n == 4 && el_mode == DFmode) ++ sprintf (name, "_ZGVdN4v_%s", bname+10); ++ else if (n == 4 && el_mode == SFmode) ++ sprintf (name, "_ZGVbN4v_%s", bname+10); ++ else ++ sprintf (name, "_ZGVbN2v_%s", bname+10); ++ ++ fntype = build_function_type_list (type_out, type_in, NULL); ++ } ++ else if (args == 2) ++ { ++ if (n == 8 && el_mode == SFmode) ++ sprintf (name, "_ZGVdN8vv_%s", bname+10); ++ else if (n == 4 && el_mode == DFmode) ++ sprintf (name, "_ZGVdN4vv_%s", bname+10); ++ else if (n == 4 && el_mode == SFmode) ++ sprintf (name, "_ZGVbN4vv_%s", bname+10); ++ else ++ sprintf (name, "_ZGVbN2vv_%s", bname+10); ++ ++ fntype = build_function_type_list (type_out, type_in, type_in, NULL); ++ } ++ else ++ gcc_unreachable (); ++ ++ /* Build a function declaration for the vectorized function. */ ++ new_fndecl = build_decl (BUILTINS_LOCATION, ++ FUNCTION_DECL, get_identifier (name), fntype); ++ TREE_PUBLIC (new_fndecl) = 1; ++ DECL_EXTERNAL (new_fndecl) = 1; ++ DECL_IS_NOVOPS (new_fndecl) = 1; ++ TREE_READONLY (new_fndecl) = 1; ++ ++ return new_fndecl; ++} ++ + /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION. */ + + tree +@@ -2599,20 +2612,82 @@ loongarch_builtin_vectorized_function (unsigned int fn, tree type_out, tree type + + switch (fn) + { +- case BUILT_IN_SQRT: +- if (out_mode == DFmode && out_n == 2 +- && in_mode == DFmode && in_n == 2) +- return LARCH_GET_BUILTIN (lsx_vfsqrt_d); ++ CASE_CFN_CEIL: ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return LARCH_GET_BUILTIN (lsx_vfrintrp_d); ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lasx_xvfrintrp_d); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lsx_vfrintrp_s); ++ if (out_n == 8 && in_n == 8) ++ return LARCH_GET_BUILTIN (lasx_xvfrintrp_s); ++ } + break; +- case BUILT_IN_SQRTF: +- if (out_mode == SFmode && out_n == 4 +- && in_mode == SFmode && in_n == 4) +- return LARCH_GET_BUILTIN (lsx_vfsqrt_s); ++ ++ CASE_CFN_TRUNC: ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return LARCH_GET_BUILTIN (lsx_vfrintrz_d); ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lasx_xvfrintrz_d); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lsx_vfrintrz_s); ++ if (out_n == 8 && in_n == 8) ++ return LARCH_GET_BUILTIN (lasx_xvfrintrz_s); ++ } + break; ++ ++ CASE_CFN_RINT: ++ CASE_CFN_ROUND: ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return LARCH_GET_BUILTIN (lsx_vfrint_d); ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lasx_xvfrint_d); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lsx_vfrint_s); ++ if (out_n == 8 && in_n == 8) ++ return LARCH_GET_BUILTIN (lasx_xvfrint_s); ++ } ++ break; ++ ++ CASE_CFN_FLOOR: ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return LARCH_GET_BUILTIN (lsx_vfrintrm_d); ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lasx_xvfrintrm_d); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return LARCH_GET_BUILTIN (lsx_vfrintrm_s); ++ if (out_n == 8 && in_n == 8) ++ return LARCH_GET_BUILTIN (lasx_xvfrintrm_s); ++ } ++ break; ++ + default: + break; + } + ++ /* Dispatch to a handler for a vectorization library. */ ++ if (loongarch_veclibabi_name && strcmp (loongarch_veclibabi_name, "sleef") == 0) ++ return loongarch_builtin_vectorized_libsleef (combined_fn (fn), type_out, type_in); + return NULL_TREE; + } + +@@ -2621,7 +2696,7 @@ loongarch_builtin_vectorized_function (unsigned int fn, tree type_out, tree type + + static void + loongarch_prepare_builtin_arg (struct expand_operand *op, tree exp, +- unsigned int argno) ++ unsigned int argno) + { + tree arg; + rtx value; +@@ -2649,11 +2724,10 @@ loongarch_gen_const_int_vector (machine_mode mode, HOST_WIDE_INT val) + + static rtx + loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, +- struct expand_operand *ops, bool has_target_p) ++ struct expand_operand *ops, bool has_target_p) + { + machine_mode imode; + int rangelo = 0, rangehi = 0, error_opno = 0; +- rtx sireg; + + switch (icode) + { +@@ -3002,7 +3076,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + + static rtx + loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, +- bool has_target_p) ++ bool has_target_p) + { + struct expand_operand ops[MAX_RECOG_OPERANDS]; + int opno, argno; +@@ -3069,7 +3143,8 @@ loongarch_expand_builtin_lsx_test_branch (enum insn_code icode, tree exp) + + rtx + loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, +- machine_mode mode, int ignore) ++ machine_mode mode ATTRIBUTE_UNUSED, ++ int ignore ATTRIBUTE_UNUSED) + { + tree fndecl; + unsigned int fcode, avail; +@@ -3097,6 +3172,7 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + } + gcc_unreachable (); + } ++ + /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ + + void +@@ -3112,32 +3188,32 @@ loongarch_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) + tree set_fcsr = loongarch_builtin_decls[LARCH_MOVGR2FCSR]; + tree get_fcsr_hold_call = build_call_expr (get_fcsr, 1, const0); + tree hold_assign_orig = build4 (TARGET_EXPR, LARCH_ATYPE_USI, +- fcsr_orig_var, get_fcsr_hold_call, +- NULL, NULL); ++ fcsr_orig_var, get_fcsr_hold_call, ++ NULL, NULL); + tree hold_mod_val = build2 (BIT_AND_EXPR, LARCH_ATYPE_USI, fcsr_orig_var, + build_int_cst (LARCH_ATYPE_USI, 0xffe0ffe0)); + tree hold_assign_mod = build4 (TARGET_EXPR, LARCH_ATYPE_USI, +- fcsr_mod_var, hold_mod_val, NULL, NULL); +- tree set_fcsr_hold_call = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var); +- tree hold_all = build2 (COMPOUND_EXPR, LARCH_ATYPE_USI, +- hold_assign_orig, hold_assign_mod); +- *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, +- set_fcsr_hold_call); ++ fcsr_mod_var, hold_mod_val, NULL, NULL); ++ tree set_fcsr_hold_call = build_call_expr (set_fcsr, 2, const0, ++ fcsr_mod_var); ++ tree hold_all = build2 (COMPOUND_EXPR, LARCH_ATYPE_USI, hold_assign_orig, ++ hold_assign_mod); ++ *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, set_fcsr_hold_call); + + *clear = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var); + + tree get_fcsr_update_call = build_call_expr (get_fcsr, 1, const0); + *update = build4 (TARGET_EXPR, LARCH_ATYPE_USI, exceptions_var, +- get_fcsr_update_call, NULL, NULL); +- tree set_fcsr_update_call = build_call_expr (set_fcsr, 2, const0, fcsr_orig_var); ++ get_fcsr_update_call, NULL, NULL); ++ tree set_fcsr_update_call = build_call_expr (set_fcsr, 2, const0, ++ fcsr_orig_var); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, + set_fcsr_update_call); + tree atomic_feraiseexcept + = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); +- tree int_exceptions_var = fold_convert (integer_type_node, +- exceptions_var); +- tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, +- 1, int_exceptions_var); ++ tree int_exceptions_var = fold_convert (integer_type_node, exceptions_var); ++ tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, 1, ++ int_exceptions_var); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, + atomic_feraiseexcept_call); + } +@@ -3149,4 +3225,3 @@ loongarch_build_builtin_va_list (void) + { + return ptr_type_node; + } +- +diff --git a/gcc/config/loongarch/loongarch-c.c b/gcc/config/loongarch/loongarch-c.c +index 6eac43bdf..f8583f7aa 100644 +--- a/gcc/config/loongarch/loongarch-c.c ++++ b/gcc/config/loongarch/loongarch-c.c +@@ -1,22 +1,22 @@ + /* LoongArch-specific code for C family languages. +- Copyright (C) 2020-2021 Free Software Foundation, Inc. +- Contributed by Andrew Waterman (zhouyingkun@mail.loongson.cn). ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. + +- This file is part of GCC. ++This file is part of GCC. + +- GCC is free software; you can redistribute it and/or modify +- it under the terms of the GNU General Public License as published by +- the Free Software Foundation; either version 3, or (at your option) +- any later version. ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. + +- GCC is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- GNU General Public License for more details. ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. + +- You should have received a copy of the GNU General Public License +- along with GCC; see the file COPYING3. If not see +- . */ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ + + #define IN_TARGET_CODE 1 + +@@ -31,7 +31,28 @@ + #define builtin_define(TXT) cpp_define (pfile, TXT) + #define builtin_assert(TXT) cpp_assert (pfile, TXT) + +-/* TODO: what is the pfile technique ??? !!! */ ++/* Define preprocessor macros for the -march and -mtune options. ++ PREFIX is either _LOONGARCH_ARCH or _LOONGARCH_TUNE, INFO is ++ the selected processor. If INFO's canonical name is "foo", ++ define PREFIX to be "foo", and define an additional macro ++ PREFIX_FOO. */ ++#define LARCH_CPP_SET_PROCESSOR(PREFIX, CPU_TYPE) \ ++ do \ ++ { \ ++ char *macro, *p; \ ++ int cpu_type = (CPU_TYPE); \ ++ \ ++ macro = concat ((PREFIX), "_", \ ++ loongarch_cpu_strings[cpu_type], NULL); \ ++ for (p = macro; *p != 0; p++) \ ++ *p = TOUPPER (*p); \ ++ \ ++ builtin_define (macro); \ ++ builtin_define_with_value ((PREFIX), \ ++ loongarch_cpu_strings[cpu_type], 1); \ ++ free (macro); \ ++ } \ ++ while (0) + + void + loongarch_cpu_cpp_builtins (cpp_reader *pfile) +@@ -40,10 +61,43 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_assert ("cpu=loongarch"); + builtin_define ("__loongarch__"); + +- if (TARGET_FLOAT64) +- builtin_define ("__loongarch_fpr=64"); ++ LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", la_target.cpu_arch); ++ LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", la_target.cpu_tune); ++ ++ /* Base architecture / ABI. */ ++ if (TARGET_64BIT) ++ { ++ builtin_define ("__loongarch_grlen=64"); ++ builtin_define ("__loongarch64"); ++ } ++ ++ if (TARGET_ABI_LP64) ++ { ++ builtin_define ("_ABILP64=3"); ++ builtin_define ("_LOONGARCH_SIM=_ABILP64"); ++ builtin_define ("__loongarch_lp64"); ++ } ++ ++ /* These defines reflect the ABI in use, not whether the ++ FPU is directly accessible. */ ++ if (TARGET_DOUBLE_FLOAT_ABI) ++ builtin_define ("__loongarch_double_float=1"); ++ else if (TARGET_SINGLE_FLOAT_ABI) ++ builtin_define ("__loongarch_single_float=1"); ++ ++ if (TARGET_DOUBLE_FLOAT_ABI || TARGET_SINGLE_FLOAT_ABI) ++ builtin_define ("__loongarch_hard_float=1"); + else +- builtin_define ("__loongarch_fpr=32"); ++ builtin_define ("__loongarch_soft_float=1"); ++ ++ ++ /* ISA Extensions. */ ++ if (TARGET_DOUBLE_FLOAT) ++ builtin_define ("__loongarch_frlen=64"); ++ else if (TARGET_SINGLE_FLOAT) ++ builtin_define ("__loongarch_frlen=32"); ++ else ++ builtin_define ("__loongarch_frlen=0"); + + if (ISA_HAS_LSX) + { +@@ -62,74 +116,12 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_define ("__loongarch_simd_width=256"); + } + +- LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", loongarch_arch_info); +- LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", loongarch_tune_info); +- +- +- switch (loongarch_abi) +- { +- case ABILP32: +- builtin_define ("_ABILP32=1"); +- builtin_define ("_LOONGARCH_SIM=_ABILP32"); +- builtin_define ("__loongarch32"); +- break; +- +- case ABILPX32: +- builtin_define ("_ABILPX32=2"); +- builtin_define ("_LOONGARCH_SIM=_ABILPX32"); +- break; +- +- case ABILP64: +- builtin_define ("_ABILP64=3"); +- builtin_define ("_LOONGARCH_SIM=_ABILP64"); +- builtin_define ("__loongarch64"); +- break; +- } + ++ /* Native Data Sizes. */ + builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE); + builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE); + builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE); +- builtin_define_with_int_value ("_LOONGARCH_FPSET", +- 32 / MAX_FPRS_PER_FMT); +- builtin_define_with_int_value ("_LOONGARCH_SPFPSET", +- 32); +- +- /* These defines reflect the ABI in use, not whether the +- FPU is directly accessible. */ +- if (TARGET_NO_FLOAT) +- builtin_define ("__loongarch_no_float"); +- else if (TARGET_HARD_FLOAT_ABI) +- builtin_define ("__loongarch_hard_float"); +- else +- builtin_define ("__loongarch_soft_float"); ++ builtin_define_with_int_value ("_LOONGARCH_FPSET", 32); ++ builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32); + +- if (TARGET_SINGLE_FLOAT) +- builtin_define ("__loongarch_single_float"); +- +- /* Macros dependent on the C dialect. */ +- if (preprocessing_asm_p ()) +- { +- builtin_define_std ("LANGUAGE_ASSEMBLY"); +- builtin_define ("_LANGUAGE_ASSEMBLY"); +- } +- else if (c_dialect_cxx ()) +- { +- builtin_define ("_LANGUAGE_C_PLUS_PLUS"); +- builtin_define ("__LANGUAGE_C_PLUS_PLUS"); +- builtin_define ("__LANGUAGE_C_PLUS_PLUS__"); +- } +- else +- { +- builtin_define_std ("LANGUAGE_C"); +- builtin_define ("_LANGUAGE_C"); +- } +- +- if (c_dialect_objc ()) +- { +- builtin_define ("_LANGUAGE_OBJECTIVE_C"); +- builtin_define ("__LANGUAGE_OBJECTIVE_C"); +- /* Bizarre, but retained for backwards compatibility. */ +- builtin_define_std ("LANGUAGE_C"); +- builtin_define ("_LANGUAGE_C"); +- } + } +diff --git a/gcc/config/loongarch/loongarch-cpu.c b/gcc/config/loongarch/loongarch-cpu.c +new file mode 100644 +index 000000000..ce2e649c8 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-cpu.c +@@ -0,0 +1,291 @@ ++/* Definitions for LoongArch CPU properties. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "diagnostic-core.h" ++ ++#include "loongarch-def.h" ++#include "loongarch-opts.h" ++#include "loongarch-cpu.h" ++#include "loongarch-str.h" ++ ++/* Native CPU detection with "cpucfg" */ ++#define N_CPUCFG_WORDS 0x15 ++static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 }; ++static const int cpucfg_useful_idx[] = {0, 1, 2, 16, 17, 18, 19}; ++ ++static uint32_t ++read_cpucfg_word (int wordno) ++{ ++ /* To make cross-compiler shut up. */ ++ (void) wordno; ++ uint32_t ret = 0; ++ ++ #ifdef __loongarch__ ++ __asm__ ("cpucfg %0,%1\n\t" :"=r"(ret) :"r"(wordno)); ++ #endif ++ ++ return ret; ++} ++ ++void ++cache_cpucfg (void) ++{ ++ for (unsigned int i = 0; i < sizeof (cpucfg_useful_idx) / sizeof (int); i++) ++ { ++ cpucfg_cache[cpucfg_useful_idx[i]] ++ = read_cpucfg_word (cpucfg_useful_idx[i]); ++ } ++} ++ ++uint32_t ++get_native_prid (void) ++{ ++ /* Fill loongarch_cpu_default_config[CPU_NATIVE] with cpucfg data, ++ see "Loongson Architecture Reference Manual" ++ (Volume 1, Section 2.2.10.5) */ ++ return cpucfg_cache[0]; ++} ++ ++const char* ++get_native_prid_str (void) ++{ ++ static char prid_str[9]; ++ sprintf (prid_str, "%08x", cpucfg_cache[0]); ++ return (const char*) prid_str; ++} ++ ++ ++/* Fill property tables for CPU_NATIVE. */ ++void ++fill_native_cpu_config (struct loongarch_target *tgt) ++{ ++ int arch_native_p = tgt->cpu_arch == CPU_NATIVE; ++ int tune_native_p = tgt->cpu_tune == CPU_NATIVE; ++ int native_cpu_type = CPU_NATIVE; ++ ++ /* Nothing needs to be done unless "-march/tune=native" ++ is given or implied. */ ++ if (!arch_native_p && !tune_native_p) ++ return; ++ ++ /* Fill cpucfg_cache with the "cpucfg" instruction. */ ++ cache_cpucfg (); ++ ++ /* Fill: tgt->cpu_arch | tgt->cpu_tune ++ With: processor ID (PRID) ++ At: cpucfg_words[0][31:0] */ ++ ++ switch (cpucfg_cache[0] & 0x00ffff00) ++ { ++ case 0x0014d000: /* LA664 */ ++ native_cpu_type = CPU_LA664; ++ break; ++ ++ case 0x0014c000: /* LA464 */ ++ native_cpu_type = CPU_LA464; ++ break; ++ ++ case 0x0014b000: /* LA364 */ ++ native_cpu_type = CPU_LA364; ++ break; ++ ++ case 0x0014a000: /* LA264 */ ++ native_cpu_type = CPU_LA264; ++ break; ++ ++ default: ++ /* Unknown PRID. */ ++ if (tune_native_p) ++ inform (UNKNOWN_LOCATION, "unknown processor ID %<0x%x%>, " ++ "some tuning parameters will fall back to default", ++ cpucfg_cache[0]); ++ break; ++ } ++ ++ /* if -march=native */ ++ if (arch_native_p) ++ { ++ int tmp; ++ tgt->cpu_arch = native_cpu_type; ++ ++ /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].base ++ With: base architecture (ARCH) ++ At: cpucfg_words[1][1:0] */ ++ ++ #define PRESET_ARCH (loongarch_cpu_default_isa[tgt->cpu_arch].base) ++ switch (cpucfg_cache[1] & 0x3) ++ { ++ case 0x02: ++ tmp = ISA_BASE_LA64V100; ++ break; ++ ++ default: ++ fatal_error (UNKNOWN_LOCATION, ++ "unknown native base architecture %<0x%x%>, %qs failed", ++ (unsigned int) (cpucfg_cache[1] & 0x3), ++ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); ++ } ++ ++ /* Check consistency with PRID presets. */ ++ if (native_cpu_type != CPU_NATIVE && tmp != PRESET_ARCH) ++ warning (0, "base architecture %qs differs from PRID preset %qs", ++ loongarch_isa_base_strings[tmp], ++ loongarch_isa_base_strings[PRESET_ARCH]); ++ ++ /* Use the native value anyways. */ ++ PRESET_ARCH = tmp; ++ ++ /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].fpu ++ With: FPU type (FP, FP_SP, FP_DP) ++ At: cpucfg_words[2][2:0] */ ++ ++ #define PRESET_FPU (loongarch_cpu_default_isa[tgt->cpu_arch].fpu) ++ switch (cpucfg_cache[2] & 0x7) ++ { ++ case 0x07: ++ tmp = ISA_EXT_FPU64; ++ break; ++ ++ case 0x03: ++ tmp = ISA_EXT_FPU32; ++ break; ++ ++ case 0x00: ++ tmp = ISA_EXT_NONE; ++ break; ++ ++ default: ++ fatal_error (UNKNOWN_LOCATION, ++ "unknown native FPU type %<0x%x%>, %qs failed", ++ (unsigned int) (cpucfg_cache[2] & 0x7), ++ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); ++ } ++ ++ /* Check consistency with PRID presets. */ ++ if (native_cpu_type != CPU_NATIVE && tmp != PRESET_FPU) ++ warning (0, "floating-point unit %qs differs from PRID preset %qs", ++ loongarch_isa_ext_strings[tmp], ++ loongarch_isa_ext_strings[PRESET_FPU]); ++ ++ /* Use the native value anyways. */ ++ PRESET_FPU = tmp; ++ ++ ++ /* Fill: loongarch_cpu_default_isa[CPU_NATIVE].simd ++ With: SIMD extension type (LSX, LASX) ++ At: cpucfg_words[2][7:6] */ ++ ++ #define PRESET_SIMD (loongarch_cpu_default_isa[tgt->cpu_arch].simd) ++ switch (cpucfg_cache[2] & 0xc0) ++ { ++ case 0xc0: ++ tmp = ISA_EXT_SIMD_LASX; ++ break; ++ ++ case 0x40: ++ tmp = ISA_EXT_SIMD_LSX; ++ break; ++ ++ case 0x80: ++ warning (0, "unknown SIMD extension " ++ "(%qs disabled while %qs is enabled), disabling SIMD", ++ loongarch_isa_ext_strings[ISA_EXT_SIMD_LSX], ++ loongarch_isa_ext_strings[ISA_EXT_SIMD_LASX]); ++ ++ case 0x00: ++ tmp = 0; ++ break; ++ } ++ ++ /* Check consistency with PRID presets. */ ++ /* ++ if (native_cpu_type != CPU_NATIVE && tmp != PRESET_SIMD) ++ warning (0, "SIMD extension %qs differs from PRID preset %qs", ++ loongarch_isa_ext_strings[tmp], ++ loongarch_isa_ext_strings[PRESET_SIMD]); ++ */ ++ ++ /* Use the native value anyways. */ ++ PRESET_SIMD = tmp; ++ } ++ ++ if (tune_native_p) ++ { ++ tgt->cpu_tune = native_cpu_type; ++ ++ /* Fill: loongarch_cpu_cache[tgt->cpu_tune] ++ With: cache size info ++ At: cpucfg_words[16:20][31:0] */ ++ ++ #define PRESET_CACHE (loongarch_cpu_cache[tgt->cpu_tune]) ++ struct loongarch_cache native_cache; ++ int l1d_present = 0, l1u_present = 0; ++ int l2d_present = 0; ++ uint32_t l1_szword, l2_szword; ++ ++ l1u_present |= cpucfg_cache[16] & 3; /* bit[1:0]: unified l1 */ ++ l1d_present |= cpucfg_cache[16] & 4; /* bit[2:2]: l1d */ ++ l1_szword = l1d_present ? 18 : (l1u_present ? 17 : 0); ++ l1_szword = l1_szword ? cpucfg_cache[l1_szword]: 0; ++ ++ l2d_present |= cpucfg_cache[16] & 24; /* bit[4:3]: unified l2 */ ++ l2d_present |= cpucfg_cache[16] & 128; /* bit[7:7]: l2d */ ++ l2_szword = l2d_present ? cpucfg_cache[19]: 0; ++ ++ native_cache.l1d_line_size ++ = 1 << ((l1_szword & 0x7f000000) >> 24); /* bit[30:24]: log2(line) */ ++ ++ native_cache.l1d_size ++ = (1 << ((l1_szword & 0x00ff0000) >> 16)) /* bit[23:16]: log2(idx) */ ++ * ((l1_szword & 0x0000ffff) + 1) /* bit[15:0]: sets - 1 */ ++ * (1 << ((l1_szword & 0x7f000000) >> 24)) /* bit[30:24]: log2(line) */ ++ >> 10; /* in kibibytes */ ++ ++ native_cache.l2d_size ++ = (1 << ((l2_szword & 0x00ff0000) >> 16)) /* bit[23:16]: log2(idx) */ ++ * ((l2_szword & 0x0000ffff) + 1) /* bit[15:0]: sets - 1 */ ++ * (1 << ((l2_szword & 0x7f000000) >> 24)) /* bit[30:24]: log2(linesz) */ ++ >> 10; /* in kibibytes */ ++ ++ /* ++ if (native_cpu_type != CPU_NATIVE && ( ++ native_cache.l1d_line_size != PRESET_CACHE.l1d_line_size || ++ native_cache.l1d_size != PRESET_CACHE.l1d_size || ++ native_cache.l2d_size != PRESET_CACHE.l2d_size)) ++ warning (0, "native cache info (%) " ++ "differs from PRID preset (%)", ++ native_cache.l1d_size, native_cache.l2d_size, ++ native_cache.l1d_line_size, ++ PRESET_CACHE.l1d_size, PRESET_CACHE.l2d_size, ++ PRESET_CACHE.l1d_line_size); ++ */ ++ ++ /* Use the native value anyways. */ ++ PRESET_CACHE.l1d_line_size = native_cache.l1d_line_size; ++ PRESET_CACHE.l1d_size = native_cache.l1d_size; ++ PRESET_CACHE.l2d_size = native_cache.l2d_size; ++ } ++} +diff --git a/gcc/config/loongarch/loongarch-d.c b/gcc/config/loongarch/loongarch-cpu.h +similarity index 59% +rename from gcc/config/loongarch/loongarch-d.c +rename to gcc/config/loongarch/loongarch-cpu.h +index 971e5d33e..08d018372 100644 +--- a/gcc/config/loongarch/loongarch-d.c ++++ b/gcc/config/loongarch/loongarch-cpu.h +@@ -1,5 +1,7 @@ +-/* Subroutines for the D front end on the LARCH architecture. +- Copyright (C) 2017 Free Software Foundation, Inc. ++/* Definitions for loongarch native cpu property detection routines. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ ++This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by +@@ -15,17 +17,15 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +-#include "config.h" ++#ifndef LOONGARCH_CPU_H ++#define LOONGARCH_CPU_H ++ + #include "system.h" +-#include "coretypes.h" +-#include "tm.h" +-#include "d/d-target.h" +-#include "d/d-target-def.h" +- +-/* Implement TARGET_D_CPU_VERSIONS for LARCH targets. */ +- +-void +-loongarch_d_target_versions (void) +-{ +- // need to be improved !! +-} ++#include "loongarch-def.h" ++ ++void cache_cpucfg (void); ++void fill_native_cpu_config (struct loongarch_target *tgt); ++uint32_t get_native_prid (void); ++const char* get_native_prid_str (void); ++ ++#endif /* LOONGARCH_CPU_H */ +diff --git a/gcc/config/loongarch/loongarch-cpus.def b/gcc/config/loongarch/loongarch-cpus.def +deleted file mode 100644 +index 7ce2508e3..000000000 +--- a/gcc/config/loongarch/loongarch-cpus.def ++++ /dev/null +@@ -1,38 +0,0 @@ +-/* LARCH CPU names. +- Copyright (C) 1989-2018 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-. */ +- +-/* A table describing all the processors GCC knows about. The first +- mention of an ISA level is taken as the canonical name for that +- ISA. +- +- To ease comparison, please keep this table in the same order +- as GAS's loongarch_cpu_info_table. Please also make sure that +- LARCH_ISA_LEVEL_SPEC and LARCH_ARCH_FLOAT_SPEC handle all -march +- options correctly. +- +- Before including this file, define a macro: +- +- LARCH_CPU (NAME, CPU, ISA, FLAGS) +- +- where the arguments are the fields of struct loongarch_cpu_info. */ +- +-/* Entries for generic ISAs. */ +-LARCH_CPU ("loongarch64", PROCESSOR_LOONGARCH64, 0, 0) +-LARCH_CPU ("la464", PROCESSOR_LA464, 0, 0) +- +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +new file mode 100644 +index 000000000..dde7a5dba +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def.c +@@ -0,0 +1,232 @@ ++/* LoongArch static properties. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "loongarch-def.h" ++#include "loongarch-str.h" ++ ++/* CPU property tables. */ ++const char* ++loongarch_cpu_strings[N_TUNE_TYPES] = { ++ [CPU_NATIVE] = STR_CPU_NATIVE, ++ [CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT, ++ [CPU_LOONGARCH64] = STR_CPU_LOONGARCH64, ++ [CPU_LA464] = STR_CPU_LA464, ++ [CPU_LA364] = STR_CPU_LA364, ++ [CPU_LA264] = STR_CPU_LA264, ++ [CPU_LA664] = STR_CPU_LA664, ++}; ++ ++struct loongarch_isa ++loongarch_cpu_default_isa[N_ARCH_TYPES] = { ++ [CPU_LOONGARCH64] = { ++ .base = ISA_BASE_LA64V100, ++ .fpu = ISA_EXT_FPU64, ++ .simd = 0, ++ }, ++ [CPU_LA464] = { ++ .base = ISA_BASE_LA64V100, ++ .fpu = ISA_EXT_FPU64, ++ .simd = ISA_EXT_SIMD_LASX, ++ }, ++ [CPU_LA364] = { ++ .base = ISA_BASE_LA64V100, ++ .fpu = ISA_EXT_FPU64, ++ .simd = ISA_EXT_SIMD_LSX, ++ }, ++ [CPU_LA264] = { ++ .base = ISA_BASE_LA64V100, ++ .fpu = ISA_EXT_FPU64, ++ .simd = ISA_EXT_SIMD_LSX, ++ }, ++ [CPU_LA664] = { ++ .base = ISA_BASE_LA64V100, ++ .fpu = ISA_EXT_FPU64, ++ .simd = ISA_EXT_SIMD_LASX, ++ }, ++}; ++ ++struct loongarch_cache ++loongarch_cpu_cache[N_TUNE_TYPES] = { ++ [CPU_LOONGARCH64] = { ++ .l1d_line_size = 64, ++ .l1d_size = 64, ++ .l2d_size = 256, ++ .simultaneous_prefetches = 4, ++ }, ++ [CPU_LA464] = { ++ .l1d_line_size = 64, ++ .l1d_size = 64, ++ .l2d_size = 256, ++ .simultaneous_prefetches = 4, ++ }, ++ [CPU_LA364] = { ++ .l1d_line_size = 64, ++ .l1d_size = 64, ++ .l2d_size = 0, ++ .simultaneous_prefetches = 4, ++ }, ++ [CPU_LA264] = { ++ .l1d_line_size = 64, ++ .l1d_size = 32, ++ .l2d_size = 0, ++ .simultaneous_prefetches = 4, ++ }, ++ [CPU_LA664] = { ++ .l1d_line_size = 64, ++ .l1d_size = 64, ++ .l2d_size = 256, ++ .simultaneous_prefetches = 4, ++ }, ++}; ++ ++/* RTX costs */ ++/* Default RTX cost initializer. */ ++#define COSTS_N_INSNS(N) ((N) * 4) ++#define DEFAULT_COSTS \ ++ .fp_add = COSTS_N_INSNS (1), \ ++ .fp_mult_sf = COSTS_N_INSNS (2), \ ++ .fp_mult_df = COSTS_N_INSNS (4), \ ++ .fp_div_sf = COSTS_N_INSNS (6), \ ++ .fp_div_df = COSTS_N_INSNS (8), \ ++ .int_mult_si = COSTS_N_INSNS (1), \ ++ .int_mult_di = COSTS_N_INSNS (1), \ ++ .int_div_si = COSTS_N_INSNS (4), \ ++ .int_div_di = COSTS_N_INSNS (6), \ ++ .branch_cost = 6, \ ++ .memory_latency = 4 ++ ++/* The following properties cannot be looked up directly using "cpucfg". ++ So it is necessary to provide a default value for "unknown native" ++ tune targets (i.e. -mtune=native while PRID does not correspond to ++ any known "-mtune" type). */ ++ ++struct loongarch_rtx_cost_data ++loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = { ++ [CPU_NATIVE] = { ++ DEFAULT_COSTS ++ }, ++ [CPU_LOONGARCH64] = { ++ DEFAULT_COSTS ++ }, ++ [CPU_LA464] = { ++ DEFAULT_COSTS ++ }, ++ [CPU_LA364] = { ++ DEFAULT_COSTS ++ }, ++ [CPU_LA264] = { ++ DEFAULT_COSTS ++ }, ++ [CPU_LA664] = { ++ DEFAULT_COSTS ++ }, ++}; ++ ++/* RTX costs to use when optimizing for size. */ ++const struct loongarch_rtx_cost_data ++loongarch_rtx_cost_optimize_size = { ++ .fp_add = 4, ++ .fp_mult_sf = 4, ++ .fp_mult_df = 4, ++ .fp_div_sf = 4, ++ .fp_div_df = 4, ++ .int_mult_si = 4, ++ .int_mult_di = 4, ++ .int_div_si = 4, ++ .int_div_di = 4, ++ .branch_cost = 2, ++ .memory_latency = 4, ++}; ++ ++int ++loongarch_cpu_issue_rate[N_TUNE_TYPES] = { ++ [CPU_NATIVE] = 4, ++ [CPU_LOONGARCH64] = 4, ++ [CPU_LA464] = 4, ++ [CPU_LA364] = 3, ++ [CPU_LA264] = 2, ++ [CPU_LA664] = 6, ++}; ++ ++int ++loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = { ++ [CPU_NATIVE] = 4, ++ [CPU_LOONGARCH64] = 4, ++ [CPU_LA464] = 4, ++ [CPU_LA364] = 4, ++ [CPU_LA264] = 4, ++ [CPU_LA664] = 4, ++}; ++ ++/* Wiring string definitions from loongarch-str.h to global arrays ++ with standard index values from loongarch-opts.h, so we can ++ print config-related messages and do ABI self-spec filtering ++ from the driver in a self-consistent manner. */ ++ ++const char* ++loongarch_isa_base_strings[N_ISA_BASE_TYPES] = { ++ [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100, ++}; ++ ++const char* ++loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = { ++ [ISA_EXT_NONE] = STR_NONE, ++ [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32, ++ [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64, ++ [ISA_EXT_SIMD_LSX] = STR_ISA_EXT_LSX, ++ [ISA_EXT_SIMD_LASX] = STR_ISA_EXT_LASX, ++}; ++ ++const char* ++loongarch_abi_base_strings[N_ABI_BASE_OPTS] = { ++ [ABI_BASE_LP64D] = STR_ABI_BASE_LP64D, ++ [ABI_BASE_LP64F] = STR_ABI_BASE_LP64F, ++ [ABI_BASE_LP64S] = STR_ABI_BASE_LP64S, ++ [ABI_BASE_LP64] = STR_ABI_BASE_LP64, ++}; ++ ++const char* ++loongarch_abi_ext_strings[N_ABI_EXT_TYPES] = { ++ [ABI_EXT_BASE] = STR_ABI_EXT_BASE, ++}; ++ ++const char* ++loongarch_cmodel_strings[] = { ++ [CMODEL_NORMAL] = STR_CMODEL_NORMAL, ++ [CMODEL_TINY] = STR_CMODEL_TINY, ++ [CMODEL_TINY_STATIC] = STR_CMODEL_TS, ++ [CMODEL_LARGE] = STR_CMODEL_LARGE, ++ [CMODEL_EXTREME] = STR_CMODEL_EXTREME, ++}; ++ ++ ++/* ABI-related definitions. */ ++const struct loongarch_isa ++abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = { ++ [ABI_BASE_LP64D] = { ++ [ABI_EXT_BASE] = {.base = ISA_BASE_LA64V100, .fpu = ISA_EXT_FPU64, .simd = 0}, ++ }, ++ [ABI_BASE_LP64F] = { ++ [ABI_EXT_BASE] = {.base = ISA_BASE_LA64V100, .fpu = ISA_EXT_FPU32, .simd = 0}, ++ }, ++ [ABI_BASE_LP64S] = { ++ [ABI_EXT_BASE] = {.base = ISA_BASE_LA64V100, .fpu = ISA_EXT_NONE, .simd = 0}, ++ }, ++}; +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +new file mode 100644 +index 000000000..45d9ac16c +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -0,0 +1,161 @@ ++/* LoongArch definitions. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Definition of standard codes for: ++ - base architecture types (isa_base), ++ - ISA extensions (isa_ext), ++ - base ABI types (abi_base), ++ - ABI extension types (abi_ext). ++ ++ - code models (cmodel) ++ - other command-line switches (switch) ++ ++ These values are primarily used for implementing option handling ++ logic in "loongarch.opt", "loongarch-driver.c" and "loongarch-opt.c". ++ ++ As for the result of this option handling process, the following ++ scheme is adopted to represent the final configuration: ++ ++ - The target ABI is encoded with a tuple (abi_base, abi_ext) ++ using the code defined below. ++ ++ - The target ISA is encoded with a "struct loongarch_isa" defined ++ in loongarch-cpu.h. ++ ++ - The target microarchitecture is represented with a cpu model ++ index defined in loongarch-cpu.h. ++*/ ++ ++#ifndef LOONGARCH_DEF_H ++#define LOONGARCH_DEF_H ++ ++#include "loongarch-tune.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* enum isa_base */ ++extern const char* loongarch_isa_base_strings[]; ++#define ISA_BASE_LA64V100 0 ++#define N_ISA_BASE_TYPES 1 ++ ++/* enum isa_ext_* */ ++extern const char* loongarch_isa_ext_strings[]; ++#define ISA_EXT_NONE 0 ++#define ISA_EXT_FPU32 1 ++#define ISA_EXT_FPU64 2 ++#define N_ISA_EXT_FPU_TYPES 3 ++#define ISA_EXT_SIMD_LSX 3 ++#define ISA_EXT_SIMD_LASX 4 ++#define N_ISA_EXT_TYPES 5 ++ ++/* enum abi_base */ ++extern const char* loongarch_abi_base_strings[]; ++#define ABI_BASE_LP64D 0 ++#define ABI_BASE_LP64F 1 ++#define ABI_BASE_LP64S 2 ++#define N_ABI_BASE_TYPES 3 ++#define ABI_BASE_LP64 3 ++#define N_ABI_BASE_OPTS 4 ++ ++#define IS_LP64_ABI_BASE(C) \ ++ (C == ABI_BASE_LP64D || C == ABI_BASE_LP64F || C == ABI_BASE_LP64S) ++ ++#define TO_LP64_ABI_BASE(C) (C) ++ ++#define ABI_FPU_64(abi_base) \ ++ (abi_base == ABI_BASE_LP64D) ++#define ABI_FPU_32(abi_base) \ ++ (abi_base == ABI_BASE_LP64F) ++#define ABI_FPU_NONE(abi_base) \ ++ (abi_base == ABI_BASE_LP64S) ++ ++ ++/* enum abi_ext */ ++extern const char* loongarch_abi_ext_strings[]; ++#define ABI_EXT_BASE 0 ++#define N_ABI_EXT_TYPES 1 ++ ++/* enum cmodel */ ++extern const char* loongarch_cmodel_strings[]; ++#define CMODEL_NORMAL 0 ++#define CMODEL_TINY 1 ++#define CMODEL_TINY_STATIC 2 ++#define CMODEL_LARGE 3 ++#define CMODEL_EXTREME 4 ++#define N_CMODEL_TYPES 5 ++ ++/* The common default value for variables whose assignments ++ are triggered by command-line options. */ ++ ++#define M_OPT_UNSET -1 ++#define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) ++ ++ ++/* Internal representation of the target. */ ++struct loongarch_isa ++{ ++ int base; /* ISA_BASE_ */ ++ int fpu; /* ISA_EXT_FPU_ */ ++ int simd; /* ISA_EXT_SIMD_ */ ++}; ++ ++struct loongarch_abi ++{ ++ int base; /* ABI_BASE_ */ ++ int ext; /* ABI_EXT_ */ ++}; ++ ++struct loongarch_target ++{ ++ struct loongarch_isa isa; ++ struct loongarch_abi abi; ++ int cpu_arch; /* CPU_ */ ++ int cpu_tune; /* same */ ++ int cmodel; /* CMODEL_ */ ++}; ++ ++/* CPU properties. */ ++/* index */ ++#define CPU_NATIVE 0 ++#define CPU_ABI_DEFAULT 1 ++#define CPU_LOONGARCH64 2 ++#define CPU_LA464 3 ++#define CPU_LA364 4 ++#define CPU_LA264 5 ++#define CPU_LA664 6 ++#define N_ARCH_TYPES 7 ++#define N_TUNE_TYPES 7 ++#define CPU_NONE 8 ++ ++/* parallel tables */ ++extern const char* loongarch_cpu_strings[]; ++extern struct loongarch_isa loongarch_cpu_default_isa[]; ++extern int loongarch_cpu_issue_rate[]; ++extern int loongarch_cpu_multipass_dfa_lookahead[]; ++ ++extern struct loongarch_cache loongarch_cpu_cache[]; ++extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[]; ++ ++#ifdef __cplusplus ++} ++#endif ++#endif /* LOONGARCH_DEF_H */ +diff --git a/gcc/config/loongarch/loongarch-driver.c b/gcc/config/loongarch/loongarch-driver.c +new file mode 100644 +index 000000000..1f56df84f +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-driver.c +@@ -0,0 +1,206 @@ ++/* Subroutines for the gcc driver. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "obstack.h" ++#include "diagnostic-core.h" ++#include "opts.h" ++ ++#include "loongarch-opts.h" ++#include "loongarch-driver.h" ++ ++/* This flag is set to 1 if we believe that the user might be avoiding ++ linking (implicitly) against something from the startfile search paths. */ ++static int no_link = 0; ++ ++/* Use the public obstack from the gcc driver (defined in gcc.c). ++ This is for allocating space for the returned string. */ ++extern struct obstack opts_obstack; ++ ++const char* ++la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED) ++{ ++ /* Initialize all fields of la_target to -1 */ ++ loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, ++ M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET); ++ return ""; ++} ++ ++const char* ++driver_set_no_link (int argc, const char **argv) ++{ ++ no_link = 1; ++ return ""; ++} ++ ++const char* ++driver_set_m_parm (int argc, const char **argv) ++{ ++ gcc_assert (argc == 2); ++ ++#define LARCH_DRIVER_PARSE_PARM(OPT_IDX, NAME, OPTSTR_LIST, \ ++ OPT_IDX_LO, OPT_IDX_HI) \ ++ if (strcmp (argv[0], OPTSTR_##NAME) == 0) \ ++ for (int i = (OPT_IDX_LO); i < (OPT_IDX_HI); i++) \ ++ { \ ++ if ((OPTSTR_LIST)[i] != 0) \ ++ if (strcmp (argv[1], (OPTSTR_LIST)[i]) == 0) \ ++ { \ ++ (OPT_IDX) = i; \ ++ return 0; \ ++ } \ ++ } ++ ++ LARCH_DRIVER_PARSE_PARM (la_target.abi.base, ABI_BASE, \ ++ loongarch_abi_base_strings, 0, N_ABI_BASE_OPTS) ++ ++ LARCH_DRIVER_PARSE_PARM (la_target.isa.fpu, ISA_EXT_FPU, \ ++ loongarch_isa_ext_strings, 0, N_ISA_EXT_FPU_TYPES) ++ ++ LARCH_DRIVER_PARSE_PARM (la_target.isa.simd, ISA_EXT_SIMD, \ ++ loongarch_isa_ext_strings, 0, N_ISA_EXT_TYPES) ++ ++ LARCH_DRIVER_PARSE_PARM (la_target.cpu_arch, ARCH, \ ++ loongarch_cpu_strings, 0, N_ARCH_TYPES) ++ ++ LARCH_DRIVER_PARSE_PARM (la_target.cpu_tune, TUNE, \ ++ loongarch_cpu_strings, 0, N_TUNE_TYPES) ++ ++ LARCH_DRIVER_PARSE_PARM (la_target.cmodel, CMODEL, \ ++ loongarch_cmodel_strings, 0, N_CMODEL_TYPES) ++ ++ gcc_unreachable (); ++} ++ ++static void ++driver_record_deferred_opts (struct loongarch_flags *flags) ++{ ++ unsigned int i; ++ cl_deferred_option *opt; ++ vec *v = (vec *) la_deferred_options; ++ ++ gcc_assert (flags); ++ ++ /* Initialize flags */ ++ flags->flt = M_OPT_UNSET; ++ flags->flt_str = NULL; ++ flags->sx[0] = flags->sx[1] = 0; ++ ++ int sx_flag_idx = 0; ++ ++ if (v) ++ FOR_EACH_VEC_ELT (*v, i, opt) ++ { ++ switch (opt->opt_index) ++ { ++ case OPT_mlsx: ++ flags->sx[sx_flag_idx++] = ISA_EXT_SIMD_LSX * (opt->value ? 1 : -1); ++ break; ++ ++ case OPT_mlasx: ++ flags->sx[sx_flag_idx++] = ISA_EXT_SIMD_LASX * (opt->value ? 1 : -1); ++ break; ++ ++ case OPT_msoft_float: ++ flags->flt = ISA_EXT_NONE; ++ flags->flt_str = OPTSTR_SOFT_FLOAT; ++ break; ++ ++ case OPT_msingle_float: ++ flags->flt = ISA_EXT_FPU32; ++ flags->flt_str = OPTSTR_SINGLE_FLOAT; ++ break; ++ ++ case OPT_mdouble_float: ++ flags->flt = ISA_EXT_FPU64; ++ flags->flt_str = OPTSTR_DOUBLE_FLOAT; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ gcc_assert (sx_flag_idx <= 2); ++ } ++} ++ ++const char* ++driver_get_normalized_m_opts (int argc, const char **argv ATTRIBUTE_UNUSED) ++{ ++ if (argc != 0) ++ return " %eget_normalized_m_opts requires no argument.\n"; ++ ++ struct loongarch_flags flags; ++ driver_record_deferred_opts (&flags); ++ loongarch_config_target (&la_target, &flags, !no_link /* follow_multilib_list */); ++ ++ /* Output normalized option strings. */ ++ obstack_blank (&opts_obstack, 0); ++ ++#undef APPEND_LTR ++#define APPEND_LTR(S) \ ++ obstack_grow (&opts_obstack, (const void*) (S), \ ++ sizeof ((S)) / sizeof (char) -1) ++ ++#undef APPEND_VAL ++#define APPEND_VAL(S) \ ++ obstack_grow (&opts_obstack, (const void*) (S), strlen ((S))) ++ ++#undef APPEND_OPT ++#define APPEND_OPT(NAME) \ ++ APPEND_LTR (" %. */ ++ ++#ifndef LOONGARCH_DRIVER_H ++#define LOONGARCH_DRIVER_H ++ ++#include "loongarch-str.h" ++ ++extern const char* ++la_driver_init (int argc, const char **argv); ++ ++extern const char* ++driver_set_m_parm (int argc, const char **argv); ++ ++extern const char* ++driver_set_no_link (int argc, const char **argv); ++ ++extern const char* ++driver_get_normalized_m_opts (int argc, const char **argv); ++ ++#define EXTRA_SPEC_FUNCTIONS \ ++ { "driver_init", la_driver_init }, \ ++ { "set_m_parm", driver_set_m_parm }, \ ++ { "set_no_link", driver_set_no_link }, \ ++ { "get_normalized_m_opts", driver_get_normalized_m_opts }, ++ ++/* Pre-process ABI-related options. */ ++#define LA_SET_PARM_SPEC(NAME) \ ++ " %{m" OPTSTR_##NAME "=*: %:set_m_parm(" OPTSTR_##NAME " %*)}" \ ++ ++#define DRIVER_HANDLE_MACHINE_OPTIONS \ ++ " %:driver_init()" \ ++ " %{c|S|E|nostdlib: %:set_no_link()}" \ ++ " %{nostartfiles: %{nodefaultlibs: %:set_no_link()}}" \ ++ LA_SET_PARM_SPEC (ABI_BASE) \ ++ LA_SET_PARM_SPEC (ARCH) \ ++ LA_SET_PARM_SPEC (TUNE) \ ++ LA_SET_PARM_SPEC (ISA_EXT_FPU) \ ++ LA_SET_PARM_SPEC (ISA_EXT_SIMD) \ ++ LA_SET_PARM_SPEC (CMODEL) \ ++ " %:get_normalized_m_opts()" ++ ++#define DRIVER_SELF_SPECS \ ++ DRIVER_HANDLE_MACHINE_OPTIONS ++ ++/* ABI spec strings. */ ++#define ABI_GRLEN_SPEC \ ++ "%{mabi=lp64*:64}" \ ++ ++#define ABI_SPEC \ ++ "%{mabi=lp64d:lp64d}" \ ++ "%{mabi=lp64f:lp64f}" \ ++ "%{mabi=lp64s:lp64s}" \ ++ ++#endif /* LOONGARCH_DRIVER_H */ +diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def +index a10a025ba..1ef4e2dc8 100644 +--- a/gcc/config/loongarch/loongarch-ftypes.def ++++ b/gcc/config/loongarch/loongarch-ftypes.def +@@ -1,5 +1,7 @@ +-/* Definitions of prototypes for LARCH built-in functions. -*- C -*- +- Copyright (C) 2007-2018 Free Software Foundation, Inc. ++/* Definitions of prototypes for LoongArch built-in functions. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Co. Ltd. ++ Based on MIPS target for GNU compiler. + + This file is part of GCC. + +@@ -18,11 +20,11 @@ along with GCC; see the file COPYING3. If not see + . */ + + /* Invoke DEF_LARCH_FTYPE (NARGS, LIST) for each prototype used by +- LARCH built-in functions, where: ++ LoongArch built-in functions, where: + + NARGS is the number of arguments. + LIST contains the return-type code followed by the codes for each +- argument type. ++ argument type. + + Argument- and return-type codes are either modes or one of the following: + +@@ -30,65 +32,55 @@ along with GCC; see the file COPYING3. If not see + INT for integer_type_node + POINTER for ptr_type_node + +- (we don't use PTR because that's a ANSI-compatibillity macro). ++ (we don't use PTR because that's a ANSI-compatibility macro). + + Please keep this list lexicographically sorted by the LIST argument. */ +-DEF_LARCH_FTYPE (1, (DF, DF)) +-DEF_LARCH_FTYPE (2, (DF, DF, DF)) +-DEF_LARCH_FTYPE (1, (DF, V2DF)) +-DEF_LARCH_FTYPE (1, (DF, V4DF)) + +-DEF_LARCH_FTYPE (1, (DI, DI)) +-DEF_LARCH_FTYPE (1, (DI, SI)) +-DEF_LARCH_FTYPE (1, (DI, UQI)) +-DEF_LARCH_FTYPE (1, (UDI, USI)) ++/* Non-vector builtin types. */ ++ + DEF_LARCH_FTYPE (1, (UQI, USI)) +-DEF_LARCH_FTYPE (1, (USI, UQI)) + DEF_LARCH_FTYPE (1, (UHI, USI)) +-DEF_LARCH_FTYPE (2, (DI, DI, DI)) +-DEF_LARCH_FTYPE (2, (DI, DI, SI)) +-DEF_LARCH_FTYPE (2, (DI, DI, UQI)) ++DEF_LARCH_FTYPE (1, (USI, USI)) ++DEF_LARCH_FTYPE (1, (UDI, USI)) ++DEF_LARCH_FTYPE (1, (USI, UQI)) ++DEF_LARCH_FTYPE (1, (VOID, USI)) ++ ++DEF_LARCH_FTYPE (2, (VOID, UQI, USI)) ++DEF_LARCH_FTYPE (2, (VOID, UHI, USI)) ++DEF_LARCH_FTYPE (2, (VOID, USI, USI)) ++DEF_LARCH_FTYPE (2, (VOID, UDI, USI)) + DEF_LARCH_FTYPE (2, (VOID, DI, UQI)) + DEF_LARCH_FTYPE (2, (VOID, SI, UQI)) ++DEF_LARCH_FTYPE (2, (VOID, DI, DI)) ++DEF_LARCH_FTYPE (2, (SI, SI, UQI)) ++DEF_LARCH_FTYPE (2, (DI, DI, UQI)) ++DEF_LARCH_FTYPE (2, (SI, QI, SI)) ++DEF_LARCH_FTYPE (2, (SI, HI, SI)) ++DEF_LARCH_FTYPE (2, (SI, SI, SI)) ++DEF_LARCH_FTYPE (2, (SI, DI, SI)) ++DEF_LARCH_FTYPE (2, (USI, USI, USI)) + DEF_LARCH_FTYPE (2, (UDI, UDI, USI)) +-DEF_LARCH_FTYPE (3, (DI, DI, SI, SI)) +-DEF_LARCH_FTYPE (3, (DI, DI, USI, USI)) +-DEF_LARCH_FTYPE (3, (DI, DI, DI, QI)) ++ ++DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI)) ++DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI)) ++DEF_LARCH_FTYPE (3, (USI, USI, USI, USI)) + DEF_LARCH_FTYPE (3, (UDI, UDI, UDI, USI)) ++ ++/* Vector builtin types. */ ++ ++DEF_LARCH_FTYPE (1, (DF, V2DF)) ++DEF_LARCH_FTYPE (1, (DF, V4DF)) + DEF_LARCH_FTYPE (3, (DI, DI, V2HI, V2HI)) + DEF_LARCH_FTYPE (3, (DI, DI, V4QI, V4QI)) +-DEF_LARCH_FTYPE (2, (DI, POINTER, SI)) +-DEF_LARCH_FTYPE (2, (DI, SI, SI)) +-DEF_LARCH_FTYPE (2, (DI, USI, USI)) + DEF_LARCH_FTYPE (2, (DI, V2DI, UQI)) + DEF_LARCH_FTYPE (2, (DI, V4DI, UQI)) + +-DEF_LARCH_FTYPE (2, (INT, DF, DF)) +-DEF_LARCH_FTYPE (2, (INT, SF, SF)) + DEF_LARCH_FTYPE (2, (INT, V2SF, V2SF)) + DEF_LARCH_FTYPE (4, (INT, V2SF, V2SF, V2SF, V2SF)) + +-DEF_LARCH_FTYPE (1, (SF, SF)) +-DEF_LARCH_FTYPE (2, (SF, SF, SF)) + DEF_LARCH_FTYPE (1, (SF, V2SF)) + DEF_LARCH_FTYPE (1, (SF, V4SF)) + +-DEF_LARCH_FTYPE (2, (SI, DI, SI)) +-DEF_LARCH_FTYPE (2, (SI, POINTER, SI)) +-DEF_LARCH_FTYPE (1, (SI, SI)) +-DEF_LARCH_FTYPE (1, (USI, USI)) +-DEF_LARCH_FTYPE (1, (SI, UDI)) +-DEF_LARCH_FTYPE (2, (QI, QI, QI)) +-DEF_LARCH_FTYPE (2, (HI, HI, HI)) +-DEF_LARCH_FTYPE (2, (SI, QI, SI)) +-DEF_LARCH_FTYPE (2, (SI, HI, SI)) +-DEF_LARCH_FTYPE (2, (SI, SI, SI)) +-DEF_LARCH_FTYPE (2, (SI, SI, UQI)) +-DEF_LARCH_FTYPE (2, (USI, USI, USI)) +-DEF_LARCH_FTYPE (3, (SI, SI, SI, SI)) +-DEF_LARCH_FTYPE (3, (SI, SI, SI, QI)) +-DEF_LARCH_FTYPE (3, (USI, USI, USI, USI)) +-DEF_LARCH_FTYPE (1, (SI, UQI)) + DEF_LARCH_FTYPE (1, (SI, UV16QI)) + DEF_LARCH_FTYPE (1, (SI, UV32QI)) + DEF_LARCH_FTYPE (1, (SI, UV2DI)) +@@ -106,9 +98,7 @@ DEF_LARCH_FTYPE (2, (SI, V4QI, V4QI)) + DEF_LARCH_FTYPE (2, (SI, V4SI, UQI)) + DEF_LARCH_FTYPE (2, (SI, V8SI, UQI)) + DEF_LARCH_FTYPE (2, (SI, V8HI, UQI)) +-DEF_LARCH_FTYPE (1, (SI, VOID)) + +-DEF_LARCH_FTYPE (2, (UDI, UDI, UDI)) + DEF_LARCH_FTYPE (2, (USI, V32QI, UQI)) + DEF_LARCH_FTYPE (2, (UDI, UV2SI, UV2SI)) + DEF_LARCH_FTYPE (2, (USI, V8SI, UQI)) +@@ -119,8 +109,6 @@ DEF_LARCH_FTYPE (2, (UDI, V4DI, UQI)) + DEF_LARCH_FTYPE (2, (USI, V16QI, UQI)) + DEF_LARCH_FTYPE (2, (USI, V4SI, UQI)) + DEF_LARCH_FTYPE (2, (USI, V8HI, UQI)) +-DEF_LARCH_FTYPE (1, (USI, VOID)) +- + DEF_LARCH_FTYPE (2, (UV16QI, UV16QI, UQI)) + DEF_LARCH_FTYPE (2, (UV16QI, UV16QI, USI)) + DEF_LARCH_FTYPE (2, (UV16QI, UV16QI, UV16QI)) +@@ -476,19 +464,6 @@ DEF_LARCH_FTYPE (2, (V8QI, V4HI, V4HI)) + DEF_LARCH_FTYPE (1, (V8QI, V8QI)) + DEF_LARCH_FTYPE (2, (V8QI, V8QI, V8QI)) + +-DEF_LARCH_FTYPE (2, (VOID, SI, CVPOINTER)) +-DEF_LARCH_FTYPE (2, (VOID, SI, SI)) +-DEF_LARCH_FTYPE (2, (VOID, DI, DI)) +-DEF_LARCH_FTYPE (2, (VOID, UQI, SI)) +-DEF_LARCH_FTYPE (1, (VOID, USI)) +-DEF_LARCH_FTYPE (2, (VOID, USI, UQI)) +-DEF_LARCH_FTYPE (1, (VOID, UHI)) +-DEF_LARCH_FTYPE (2, (VOID, UQI, USI)) +-DEF_LARCH_FTYPE (2, (VOID, UHI, USI)) +-DEF_LARCH_FTYPE (2, (VOID, USI, USI)) +-DEF_LARCH_FTYPE (2, (VOID, UDI, USI)) +-DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI)) +-DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI)) + DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, SI)) + DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, DI)) + DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, SI)) +@@ -648,36 +623,36 @@ DEF_LARCH_FTYPE (3, (V4SI, V4SI, UV16QI, V16QI)) + DEF_LARCH_FTYPE (3, (UV4SI, UV4SI, UV16QI, UV16QI)) + + +-DEF_LARCH_FTYPE(2,(V4DI,V16HI,V16HI)) +-DEF_LARCH_FTYPE(2,(V4DI,UV4SI,V4SI)) +-DEF_LARCH_FTYPE(2,(V8SI,UV16HI,V16HI)) +-DEF_LARCH_FTYPE(2,(V16HI,UV32QI,V32QI)) +-DEF_LARCH_FTYPE(2,(V4DI,UV8SI,V8SI)) +-DEF_LARCH_FTYPE(3,(V4DI,V4DI,V16HI,V16HI)) +-DEF_LARCH_FTYPE(2,(UV32QI,V32QI,UV32QI)) +-DEF_LARCH_FTYPE(2,(UV16HI,V16HI,UV16HI)) +-DEF_LARCH_FTYPE(2,(UV8SI,V8SI,UV8SI)) +-DEF_LARCH_FTYPE(2,(UV4DI,V4DI,UV4DI)) +-DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV4DI,V4DI)) +-DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV8SI,V8SI)) +-DEF_LARCH_FTYPE(3,(V8SI,V8SI,UV16HI,V16HI)) +-DEF_LARCH_FTYPE(3,(V16HI,V16HI,UV32QI,V32QI)) +-DEF_LARCH_FTYPE(2,(V4DI,UV4DI,V4DI)) +-DEF_LARCH_FTYPE(2,(V8SI,V32QI,V32QI)) +-DEF_LARCH_FTYPE(2,(UV4DI,UV16HI,UV16HI)) +-DEF_LARCH_FTYPE(2,(V4DI,UV16HI,V16HI)) +-DEF_LARCH_FTYPE(3,(V8SI,V8SI,V32QI,V32QI)) +-DEF_LARCH_FTYPE(3,(UV8SI,UV8SI,UV32QI,UV32QI)) +-DEF_LARCH_FTYPE(3,(UV4DI,UV4DI,UV16HI,UV16HI)) +-DEF_LARCH_FTYPE(3,(V8SI,V8SI,UV32QI,V32QI)) +-DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV16HI,V16HI)) +-DEF_LARCH_FTYPE(2,(UV8SI,UV32QI,UV32QI)) +-DEF_LARCH_FTYPE(2,(V8SI,UV32QI,V32QI)) +- +-DEF_LARCH_FTYPE(4,(VOID,V16QI,CVPOINTER,SI,UQI)) +-DEF_LARCH_FTYPE(4,(VOID,V8HI,CVPOINTER,SI,UQI)) +-DEF_LARCH_FTYPE(4,(VOID,V4SI,CVPOINTER,SI,UQI)) +-DEF_LARCH_FTYPE(4,(VOID,V2DI,CVPOINTER,SI,UQI)) ++DEF_LARCH_FTYPE (2, (V4DI, V16HI, V16HI)) ++DEF_LARCH_FTYPE (2, (V4DI, UV4SI, V4SI)) ++DEF_LARCH_FTYPE (2, (V8SI, UV16HI, V16HI)) ++DEF_LARCH_FTYPE (2, (V16HI, UV32QI, V32QI)) ++DEF_LARCH_FTYPE (2, (V4DI, UV8SI, V8SI)) ++DEF_LARCH_FTYPE (3, (V4DI, V4DI, V16HI, V16HI)) ++DEF_LARCH_FTYPE (2, (UV32QI, V32QI, UV32QI)) ++DEF_LARCH_FTYPE (2, (UV16HI, V16HI, UV16HI)) ++DEF_LARCH_FTYPE (2, (UV8SI, V8SI, UV8SI)) ++DEF_LARCH_FTYPE (2, (UV4DI, V4DI, UV4DI)) ++DEF_LARCH_FTYPE (3, (V4DI, V4DI, UV4DI, V4DI)) ++DEF_LARCH_FTYPE (3, (V4DI, V4DI, UV8SI, V8SI)) ++DEF_LARCH_FTYPE (3, (V8SI, V8SI, UV16HI, V16HI)) ++DEF_LARCH_FTYPE (3, (V16HI, V16HI, UV32QI, V32QI)) ++DEF_LARCH_FTYPE (2, (V4DI, UV4DI, V4DI)) ++DEF_LARCH_FTYPE (2, (V8SI, V32QI, V32QI)) ++DEF_LARCH_FTYPE (2, (UV4DI, UV16HI, UV16HI)) ++DEF_LARCH_FTYPE (2, (V4DI, UV16HI, V16HI)) ++DEF_LARCH_FTYPE (3, (V8SI, V8SI, V32QI, V32QI)) ++DEF_LARCH_FTYPE (3, (UV8SI, UV8SI, UV32QI, UV32QI)) ++DEF_LARCH_FTYPE (3, (UV4DI, UV4DI, UV16HI, UV16HI)) ++DEF_LARCH_FTYPE (3, (V8SI, V8SI, UV32QI, V32QI)) ++DEF_LARCH_FTYPE (3, (V4DI, V4DI, UV16HI, V16HI)) ++DEF_LARCH_FTYPE (2, (UV8SI, UV32QI, UV32QI)) ++DEF_LARCH_FTYPE (2, (V8SI, UV32QI, V32QI)) ++ ++DEF_LARCH_FTYPE (4, (VOID, V16QI, CVPOINTER, SI, UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V8HI, CVPOINTER, SI, UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V4SI, CVPOINTER, SI, UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V2DI, CVPOINTER, SI, UQI)) + + DEF_LARCH_FTYPE (2, (DI, V16QI, UQI)) + DEF_LARCH_FTYPE (2, (DI, V8HI, UQI)) +@@ -699,16 +674,16 @@ DEF_LARCH_FTYPE (3, (UV16HI, UV16HI, V16HI, USI)) + DEF_LARCH_FTYPE (3, (UV8SI, UV8SI, V8SI, USI)) + DEF_LARCH_FTYPE (3, (UV4DI, UV4DI, V4DI, USI)) + +-DEF_LARCH_FTYPE(4,(VOID,V32QI,CVPOINTER,SI,UQI)) +-DEF_LARCH_FTYPE(4,(VOID,V16HI,CVPOINTER,SI,UQI)) +-DEF_LARCH_FTYPE(4,(VOID,V8SI,CVPOINTER,SI,UQI)) +-DEF_LARCH_FTYPE(4,(VOID,V4DI,CVPOINTER,SI,UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V32QI, CVPOINTER, SI, UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V16HI, CVPOINTER, SI, UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V8SI, CVPOINTER, SI, UQI)) ++DEF_LARCH_FTYPE (4, (VOID, V4DI, CVPOINTER, SI, UQI)) + +-DEF_LARCH_FTYPE (1, (BOOLEAN,V16QI)) +-DEF_LARCH_FTYPE(2,(V16QI,CVPOINTER,CVPOINTER)) +-DEF_LARCH_FTYPE(3,(VOID,V16QI,CVPOINTER,CVPOINTER)) +-DEF_LARCH_FTYPE(2,(V32QI,CVPOINTER,CVPOINTER)) +-DEF_LARCH_FTYPE(3,(VOID,V32QI,CVPOINTER,CVPOINTER)) ++DEF_LARCH_FTYPE (1, (BOOLEAN, V16QI)) ++DEF_LARCH_FTYPE (2, (V16QI, CVPOINTER, CVPOINTER)) ++DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, CVPOINTER)) ++DEF_LARCH_FTYPE (2, (V32QI, CVPOINTER, CVPOINTER)) ++DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, CVPOINTER)) + + DEF_LARCH_FTYPE (3, (V16QI, V16QI, SI, UQI)) + DEF_LARCH_FTYPE (3, (V2DI, V2DI, SI, UQI)) +diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def +index fe5bc38d9..53392b484 100644 +--- a/gcc/config/loongarch/loongarch-modes.def ++++ b/gcc/config/loongarch/loongarch-modes.def +@@ -1,5 +1,7 @@ +-/* LARCH extra machine modes. +- Copyright (C) 2003-2018 Free Software Foundation, Inc. ++/* LoongArch extra machine modes. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Co. Ltd. ++ Based on MIPS target for GNU compiler. + + This file is part of GCC. + +diff --git a/gcc/config/loongarch/loongarch-opts.c b/gcc/config/loongarch/loongarch-opts.c +new file mode 100644 +index 000000000..cf11f67d1 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-opts.c +@@ -0,0 +1,725 @@ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "obstack.h" ++#include "diagnostic-core.h" ++ ++#include "loongarch-cpu.h" ++#include "loongarch-opts.h" ++#include "loongarch-str.h" ++#include "loongarch-def.h" ++ ++struct loongarch_target la_target; ++ ++/* ABI-related configuration. */ ++#define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi)) ++static const struct loongarch_abi ++abi_priority_list[] = { ++ {ABI_BASE_LP64D, ABI_EXT_BASE}, ++ {ABI_BASE_LP64F, ABI_EXT_BASE}, ++ {ABI_BASE_LP64S, ABI_EXT_BASE}, ++}; ++ ++/* Initialize enabled_abi_types from TM_MULTILIB_LIST. */ ++#ifdef LA_DISABLE_MULTILIB ++#define MULTILIB_LIST_LEN 1 ++#else ++#define MULTILIB_LIST_LEN (sizeof (tm_multilib_list) / sizeof (int) / 2) ++static const int tm_multilib_list[] = { TM_MULTILIB_LIST }; ++#endif ++static int enabled_abi_types[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = { 0 }; ++ ++#define isa_required(ABI) (abi_minimal_isa[(ABI).base][(ABI).ext]) ++extern "C" const struct loongarch_isa ++abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES]; ++ ++static inline int ++is_multilib_enabled (struct loongarch_abi abi) ++{ ++ return enabled_abi_types[abi.base][abi.ext]; ++} ++ ++static void ++init_enabled_abi_types () ++{ ++#ifdef LA_DISABLE_MULTILIB ++ enabled_abi_types[DEFAULT_ABI_BASE][DEFAULT_ABI_EXT] = 1; ++#else ++ int abi_base, abi_ext; ++ for (unsigned int i = 0; i < MULTILIB_LIST_LEN; i++) ++ { ++ abi_base = tm_multilib_list[i << 1]; ++ abi_ext = tm_multilib_list[(i << 1) + 1]; ++ enabled_abi_types[abi_base][abi_ext] = 1; ++ } ++#endif ++} ++ ++/* String processing. */ ++static struct obstack msg_obstack; ++#define APPEND_STRING(STR) obstack_grow (&msg_obstack, STR, strlen(STR)); ++#define APPEND1(CH) obstack_1grow(&msg_obstack, CH); ++ ++static const char* abi_str (struct loongarch_abi abi); ++static const char* isa_str (const struct loongarch_isa *isa, char separator); ++static const char* arch_str (const struct loongarch_target *target); ++static const char* multilib_enabled_abi_list (); /* Misc */ ++static struct loongarch_abi isa_default_abi (const struct loongarch_isa *isa); ++static int isa_base_compat_p (const struct loongarch_isa *set1, ++ const struct loongarch_isa *set2); ++static int isa_fpu_compat_p (const struct loongarch_isa *set1, ++ const struct loongarch_isa *set2); ++static int abi_compat_p (const struct loongarch_isa *isa, ++ struct loongarch_abi abi); ++static int abi_default_cpu_arch (struct loongarch_abi abi, struct loongarch_isa *isa); ++ ++/* Mandatory configure-time defaults. */ ++#ifndef DEFAULT_ABI_BASE ++#error missing definition of DEFAULT_ABI_BASE in ${tm_defines}. ++#endif ++ ++#ifndef DEFAULT_ABI_EXT ++#error missing definition of DEFAULT_ABI_EXT in ${tm_defines}. ++#endif ++ ++#ifndef DEFAULT_CPU_ARCH ++#error missing definition of DEFAULT_CPU_ARCH in ${tm_defines}. ++#endif ++ ++/* Optional configure-time defaults. */ ++#ifdef DEFAULT_CPU_TUNE ++static int with_default_tune = 1; ++#else ++#define DEFAULT_CPU_TUNE -1 ++static int with_default_tune = 0; ++#endif ++ ++#ifdef DEFAULT_ISA_EXT_FPU ++static int with_default_fpu = 1; ++#else ++#define DEFAULT_ISA_EXT_FPU -1 ++static int with_default_fpu = 0; ++#endif ++ ++#ifdef DEFAULT_ISA_EXT_SIMD ++static int with_default_simd = 1; ++#else ++#define DEFAULT_ISA_EXT_SIMD -1 ++static int with_default_simd = 0; ++#endif ++ ++ ++/* Initialize loongarch_target from separate option variables. */ ++ ++void ++loongarch_init_target (struct loongarch_target *target, ++ int cpu_arch, int cpu_tune, int fpu, int simd, ++ int abi_base, int abi_ext, int cmodel) ++{ ++ if (!target) ++ return; ++ target->cpu_arch = cpu_arch; ++ target->cpu_tune = cpu_tune; ++ target->isa.fpu = fpu; ++ target->isa.simd = simd; ++ target->abi.base = abi_base; ++ target->abi.ext = abi_ext; ++ target->cmodel = cmodel; ++} ++ ++ ++/* Handle combinations of -m parameters ++ (see loongarch.opt and loongarch-opts.h). */ ++ ++void ++loongarch_config_target (struct loongarch_target *target, ++ struct loongarch_flags *flags, ++ int follow_multilib_list_p) ++{ ++ struct loongarch_target t; ++ if (!target) ++ return; ++ ++ /* Initialization */ ++ init_enabled_abi_types (); ++ obstack_init (&msg_obstack); ++ ++ struct { ++ int arch, tune, fpu, simd, abi_base, abi_ext, cmodel, abi_flt; ++ } constrained = { ++ M_OPT_ABSENT (target->cpu_arch) ? 0 : 1, ++ M_OPT_ABSENT (target->cpu_tune) ? 0 : 1, ++ M_OPT_ABSENT (target->isa.fpu) ? 0 : 1, ++ M_OPT_ABSENT (target->isa.simd) ? 0 : 1, ++ M_OPT_ABSENT (target->abi.base) ? 0 : 1, ++ M_OPT_ABSENT (target->abi.ext) ? 0 : 1, ++ M_OPT_ABSENT (target->cmodel) ? 0 : 1, ++ M_OPT_ABSENT (target->abi.base) ? 0 : 1, ++ }; ++ ++ /* 1. Target ABI */ ++ if (constrained.abi_base && target->abi.base >= N_ABI_BASE_TYPES) ++ /* Special treatments for legacy options ("-mabi=lp64") ++ in GCC driver. */ ++ switch (target->abi.base) ++ { ++ case ABI_BASE_LP64: ++ t.abi.base = TO_LP64_ABI_BASE (DEFAULT_ABI_BASE); ++ constrained.abi_flt = 0; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ else if (constrained.abi_base) ++ t.abi.base = target->abi.base; ++ else ++ t.abi.base = DEFAULT_ABI_BASE; ++ ++ t.abi.ext = constrained.abi_ext ? target->abi.ext : DEFAULT_ABI_EXT; ++ ++ /* Process -m*-float flags */ ++ if (flags && !M_OPT_ABSENT (flags->flt)) ++ { ++ /* Modifying the original "target" here makes it easier to write the ++ t.isa.fpu assignment below, because otherwise there would be three ++ levels of precedence (-m*-float / -mfpu / -march) to be handled ++ (now the first two are merged). */ ++ ++ target->isa.fpu = flags->flt; ++ constrained.fpu = 1; ++ ++ /* The target ISA is not ready yet, but (isa_required (t.abi) ++ + forced fpu) is enough for computing the forced base ABI. */ ++ ++ struct loongarch_isa force_isa = isa_required (t.abi); ++ force_isa.fpu = flags->flt; ++ ++ struct loongarch_abi force_abi; ++ force_abi.base = isa_default_abi (&force_isa).base; ++ ++ if (constrained.abi_base && constrained.abi_flt ++ && (t.abi.base != force_abi.base)) ++ { ++ force_abi.ext = t.abi.ext; ++ inform (UNKNOWN_LOCATION, ++ "%<-m%s%> overrides %<-m%s=%s%>, adjusting ABI to %qs", ++ flags->flt_str, OPTSTR_ABI_BASE, ++ loongarch_abi_base_strings[t.abi.base], ++ abi_str (force_abi)); ++ } ++ ++ t.abi.base = force_abi.base; ++ constrained.abi_flt = 1; ++ } ++ ++#ifdef LA_DISABLE_MULTILIB ++ if (follow_multilib_list_p) ++ if (t.abi.base != DEFAULT_ABI_BASE || t.abi.ext != DEFAULT_ABI_EXT) ++ { ++ static const struct loongarch_abi default_abi ++ = {DEFAULT_ABI_BASE, DEFAULT_ABI_EXT}; ++ ++ warning (0, "ABI changed (%qs to %qs) while multilib is disabled", ++ abi_str (default_abi), abi_str (t.abi)); ++ } ++#endif ++ ++ /* 2. Target CPU */ ++ t.cpu_arch = constrained.arch ? target->cpu_arch : DEFAULT_CPU_ARCH; ++ ++ /* If cpu_tune is not set using neither -mtune nor --with-tune, ++ the current cpu_arch is used as its default. */ ++ t.cpu_tune = constrained.tune ? target->cpu_tune ++ : (constrained.arch ? target->cpu_arch : ++ (with_default_tune ? DEFAULT_CPU_TUNE : DEFAULT_CPU_ARCH)); ++ ++ ++ /* Handle -march/tune=native */ ++#ifdef __loongarch__ ++ /* For native compilers, gather local CPU information ++ and fill the "CPU_NATIVE" index of arrays defined in ++ loongarch-cpu.c. */ ++ ++ fill_native_cpu_config (&t); ++ ++#else ++ if (t.cpu_arch == CPU_NATIVE) ++ fatal_error (UNKNOWN_LOCATION, ++ "%qs does not work on a cross compiler", ++ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); ++ ++ else if (t.cpu_tune == CPU_NATIVE) ++ fatal_error (UNKNOWN_LOCATION, ++ "%qs does not work on a cross compiler", ++ "-m" OPTSTR_TUNE "=" STR_CPU_NATIVE); ++#endif ++ ++ /* Handle -march/tune=abi-default */ ++ if (t.cpu_tune == CPU_ABI_DEFAULT) ++ t.cpu_tune = abi_default_cpu_arch (t.abi, NULL); ++ ++ if (t.cpu_arch == CPU_ABI_DEFAULT) ++ { ++ t.cpu_arch = abi_default_cpu_arch (t.abi, &(t.isa)); ++ loongarch_cpu_default_isa[t.cpu_arch] = t.isa; ++ } ++ ++ /* 3. Target base ISA */ ++config_target_isa: ++ ++ /* Get default ISA from "-march" or its default value. */ ++ t.isa = loongarch_cpu_default_isa[t.cpu_arch]; ++ ++ /* Apply incremental changes. */ ++ /* "-march=native" overrides the default FPU type. */ ++ ++ t.isa.fpu = constrained.fpu ? target->isa.fpu : ++ (constrained.arch ? t.isa.fpu : ++ (with_default_fpu ? DEFAULT_ISA_EXT_FPU : t.isa.fpu)); ++ ++ t.isa.simd = constrained.simd ? target->isa.simd : ++ (constrained.arch ? t.isa.simd : ++ (with_default_simd ? DEFAULT_ISA_EXT_SIMD : t.isa.simd)); ++ ++ /* apply -m[no-]lsx and -m[no-]lasx flags */ ++ if (flags) ++ for (int i = 0; i < 2; i++) ++ { ++ switch (SX_FLAG_TYPE (flags->sx[i])) ++ { ++ case ISA_EXT_SIMD_LSX: ++ constrained.simd = 1; ++ if (flags->sx[i] > 0 && t.isa.simd != ISA_EXT_SIMD_LASX) ++ t.isa.simd = ISA_EXT_SIMD_LSX; ++ else if (flags->sx[i] < 0) ++ t.isa.simd = ISA_EXT_NONE; ++ break; ++ ++ case ISA_EXT_SIMD_LASX: ++ constrained.simd = 1; ++ if (flags->sx[i] < 0 && t.isa.simd == ISA_EXT_SIMD_LASX) ++ t.isa.simd = ISA_EXT_SIMD_LSX; ++ else if (flags->sx[i] > 0) ++ t.isa.simd = ISA_EXT_SIMD_LASX; ++ break; ++ ++ case 0: ++ break; ++ ++ default: ++ gcc_unreachable(); ++ } ++ } ++ ++ /* All SIMD extensions imply a 64-bit FPU: ++ - silently adjust t.isa.fpu to "fpu64" if it is unconstrained. ++ - warn if -msingle-float / -msoft-float is on, ++ then disable SIMD extensions (done in driver) ++ - abort if -mfpu=0 / -mfpu=32 is forced. */ ++ ++ if (t.isa.simd != ISA_EXT_NONE && t.isa.fpu != ISA_EXT_FPU64) ++ { ++ if (!constrained.fpu) ++ { ++ /* As long as the arch-default "t.isa.simd" is set to non-zero ++ for an element "t" in loongarch_cpu_default_isa, "t.isa.fpu" ++ should be set to "ISA_EXT_FPU64" accordingly. Thus reaching ++ here must be the result of forcing -mlsx/-mlasx explicitly. */ ++ gcc_assert (constrained.simd); ++ ++ inform (UNKNOWN_LOCATION, ++ "enabing %qs promotes %<%s%s%> to %<%s%s%>", ++ loongarch_isa_ext_strings[t.isa.simd], ++ OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[t.isa.fpu], ++ OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]); ++ ++ t.isa.fpu = ISA_EXT_FPU64; ++ } ++ else if (flags && (flags->flt == ISA_EXT_NONE || flags->flt == ISA_EXT_FPU32)) ++ { ++ if (constrained.simd) ++ inform (UNKNOWN_LOCATION, ++ "%qs is disabled by %<-m%s%>, because it requires %<%s%s%>", ++ loongarch_isa_ext_strings[t.isa.simd], flags->flt_str, ++ OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]); ++ ++ t.isa.simd = ISA_EXT_NONE; ++ } ++ else ++ { ++ /* -mfpu=0 / -mfpu=32 is set. */ ++ if (constrained.simd) ++ fatal_error (UNKNOWN_LOCATION, ++ "%<-m%s=%s%> conflicts with %qs, which requires %<%s%s%>", ++ OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[t.isa.fpu], ++ loongarch_isa_ext_strings[t.isa.simd], ++ OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]); ++ ++ /* Same as above. */ ++ t.isa.simd = ISA_EXT_NONE; ++ } ++ } ++ ++ ++ /* 4. ABI-ISA compatibility */ ++ /* Note: ++ - There IS a unique default -march value for each ABI type ++ (config.gcc: triplet -> abi -> default arch). ++ ++ - If the base ABI is incompatible with the default arch, ++ try using the default -march it implies (and mark it ++ as "constrained" this time), then re-apply step 3. */ ++ ++ struct loongarch_abi abi_tmp; ++ const struct loongarch_isa* isa_min; ++ ++ abi_tmp = t.abi; ++ isa_min = &isa_required (abi_tmp); ++ ++ if (isa_base_compat_p (&t.isa, isa_min)); /* OK */ ++ else if (!constrained.arch) ++ { ++ /* Base architecture can only be implied by -march, ++ so we adjust that first if it is not constrained. */ ++ int fallback_arch = abi_default_cpu_arch (t.abi, NULL); ++ ++ if (t.cpu_arch == CPU_NATIVE) ++ warning (0, "your native CPU architecture (%qs) " ++ "does not support %qs ABI, falling back to %<-m%s=%s%>", ++ arch_str (&t), abi_str (t.abi), OPTSTR_ARCH, ++ loongarch_cpu_strings[fallback_arch]); ++ else ++ warning (0, "default CPU architecture (%qs) " ++ "does not support %qs ABI, falling back to %<-m%s=%s%>", ++ arch_str (&t), abi_str (t.abi), OPTSTR_ARCH, ++ loongarch_cpu_strings[fallback_arch]); ++ ++ t.cpu_arch = fallback_arch; ++ constrained.arch = 1; ++ goto config_target_isa; ++ } ++ else if (!constrained.abi_base) ++ { ++ /* If -march is given while -mabi is not, ++ try selecting another base ABI type. */ ++ abi_tmp.base = isa_default_abi (&t.isa).base; ++ } ++ else ++ goto fatal; ++ ++ if (isa_fpu_compat_p (&t.isa, isa_min)); /* OK */ ++ else if (!constrained.fpu) ++ t.isa.fpu = isa_min->fpu; ++ else if (!constrained.abi_base) ++ /* If -march is compatible with the default ABI ++ while -mfpu is not. */ ++ abi_tmp.base = isa_default_abi (&t.isa).base; ++ else ++ goto fatal; ++ ++ if (0) ++fatal: ++ fatal_error (UNKNOWN_LOCATION, ++ "unable to implement ABI %qs with instruction set %qs", ++ abi_str (t.abi), isa_str (&t.isa, '/')); ++ ++ ++ /* Using the fallback ABI. */ ++ if (abi_tmp.base != t.abi.base || abi_tmp.ext != t.abi.ext) ++ { ++ /* This flag is only set in the GCC driver. */ ++ if (follow_multilib_list_p) ++ { ++ ++ /* Continue falling back until we find a feasible ABI type ++ enabled by TM_MULTILIB_LIST. */ ++ if (!is_multilib_enabled (abi_tmp)) ++ { ++ for (unsigned int i = 0; i < ABI_COUNT; i++) ++ { ++ if (is_multilib_enabled (abi_priority_list[i]) ++ && abi_compat_p (&t.isa, abi_priority_list[i])) ++ { ++ abi_tmp = abi_priority_list[i]; ++ ++ warning (0, "ABI %qs cannot be implemented due to " ++ "limited instruction set %qs, " ++ "falling back to %qs", abi_str (t.abi), ++ isa_str (&t.isa, '/'), abi_str (abi_tmp)); ++ ++ goto fallback; ++ } ++ } ++ ++ /* Otherwise, keep using abi_tmp with a warning. */ ++#ifdef LA_DISABLE_MULTILIB ++ warning (0, "instruction set %qs cannot implement " ++ "default ABI %qs, falling back to %qs", ++ isa_str (&t.isa, '/'), abi_str (t.abi), ++ abi_str (abi_tmp)); ++#else ++ warning (0, "no multilib-enabled ABI (%qs) can be implemented " ++ "with instruction set %qs, falling back to %qs", ++ multilib_enabled_abi_list (), ++ isa_str (&t.isa, '/'), abi_str (abi_tmp)); ++#endif ++ } ++ } ++ ++fallback: ++ t.abi = abi_tmp; ++ } ++ else if (follow_multilib_list_p) ++ { ++ if (!is_multilib_enabled (t.abi)) ++ { ++ inform (UNKNOWN_LOCATION, ++ "ABI %qs is not enabled at configure-time, " ++ "the linker might report an error", abi_str (t.abi)); ++ ++ inform (UNKNOWN_LOCATION, "ABI with startfiles: %s", ++ multilib_enabled_abi_list ()); ++ } ++ } ++ ++ ++ /* 5. Target code model */ ++ t.cmodel = constrained.cmodel ? target->cmodel : CMODEL_NORMAL; ++ ++ /* Cleanup and return. */ ++ obstack_free (&msg_obstack, NULL); ++ *target = t; ++} ++ ++/* Returns the default ABI for the given instruction set. */ ++static inline struct loongarch_abi ++isa_default_abi (const struct loongarch_isa *isa) ++{ ++ struct loongarch_abi abi; ++ ++ switch (isa->fpu) ++ { ++ case ISA_EXT_FPU64: ++ if (isa->base == ISA_BASE_LA64V100) ++ abi.base = ABI_BASE_LP64D; ++ break; ++ ++ case ISA_EXT_FPU32: ++ if (isa->base == ISA_BASE_LA64V100) ++ abi.base = ABI_BASE_LP64F; ++ break; ++ ++ case ISA_EXT_NONE: ++ if (isa->base == ISA_BASE_LA64V100) ++ abi.base = ABI_BASE_LP64S; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ abi.ext = ABI_EXT_BASE; ++ return abi; ++} ++ ++/* Check if set2 is a subset of set1. */ ++static inline int ++isa_base_compat_p (const struct loongarch_isa *set1, ++ const struct loongarch_isa *set2) ++{ ++ switch (set2->base) ++ { ++ case ISA_BASE_LA64V100: ++ return (set1->base == ISA_BASE_LA64V100); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++static inline int ++isa_fpu_compat_p (const struct loongarch_isa *set1, ++ const struct loongarch_isa *set2) ++{ ++ switch (set2->fpu) ++ { ++ case ISA_EXT_FPU64: ++ return set1->fpu == ISA_EXT_FPU64; ++ ++ case ISA_EXT_FPU32: ++ return set1->fpu == ISA_EXT_FPU32 || set1->fpu == ISA_EXT_FPU64; ++ ++ case ISA_EXT_NONE: ++ return 1; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++} ++ ++static inline int ++abi_compat_p (const struct loongarch_isa *isa, struct loongarch_abi abi) ++{ ++ int compatible = 1; ++ const struct loongarch_isa *isa2 = &isa_required (abi); ++ ++ /* Append conditionals for new ISA components below. */ ++ compatible = compatible && isa_base_compat_p (isa, isa2); ++ compatible = compatible && isa_fpu_compat_p (isa, isa2); ++ return compatible; ++} ++ ++/* The behavior of this function should be consistent ++ with config.gcc. */ ++static int ++abi_default_cpu_arch (struct loongarch_abi abi, ++ struct loongarch_isa *isa) ++{ ++ static struct loongarch_isa tmp; ++ if (!isa) ++ isa = &tmp; ++ ++ if (abi.ext == ABI_EXT_BASE) ++ switch (abi.base) ++ { ++ case ABI_BASE_LP64D: ++ case ABI_BASE_LP64F: ++ case ABI_BASE_LP64S: ++ *isa = isa_required (abi); ++ return CPU_LOONGARCH64; ++ } ++ gcc_unreachable (); ++} ++ ++static const char* ++abi_str (struct loongarch_abi abi) ++{ ++ /* "/base" can be omitted. */ ++ if (abi.ext == ABI_EXT_BASE) ++ return (const char*) ++ obstack_copy0 (&msg_obstack, loongarch_abi_base_strings[abi.base], ++ strlen (loongarch_abi_base_strings[abi.base])); ++ else ++ { ++ APPEND_STRING (loongarch_abi_base_strings[abi.base]) ++ APPEND1 ('/') ++ APPEND_STRING (loongarch_abi_ext_strings[abi.ext]) ++ APPEND1 ('\0') ++ ++ return XOBFINISH (&msg_obstack, const char *); ++ } ++} ++ ++static const char* ++isa_str (const struct loongarch_isa *isa, char separator) ++{ ++ APPEND_STRING (loongarch_isa_base_strings[isa->base]) ++ APPEND1 (separator) ++ ++ if (isa->fpu == ISA_EXT_NONE) ++ { ++ APPEND_STRING ("no" OPTSTR_ISA_EXT_FPU) ++ } ++ else ++ { ++ APPEND_STRING (OPTSTR_ISA_EXT_FPU) ++ APPEND_STRING (loongarch_isa_ext_strings[isa->fpu]) ++ } ++ ++ switch (isa->simd) ++ { ++ case ISA_EXT_SIMD_LSX: ++ case ISA_EXT_SIMD_LASX: ++ APPEND1 (separator); ++ APPEND_STRING (loongarch_isa_ext_strings[isa->simd]); ++ break; ++ ++ default: ++ gcc_assert (isa->simd == 0); ++ } ++ APPEND1 ('\0') ++ ++ /* Add more here. */ ++ ++ return XOBFINISH (&msg_obstack, const char *); ++} ++ ++static const char* ++arch_str (const struct loongarch_target *target) ++{ ++ if (target->cpu_arch == CPU_NATIVE) ++ { ++ /* Describe a native CPU with unknown PRID. */ ++ const char* isa_string = isa_str (&target->isa, ','); ++ APPEND_STRING ("PRID: 0x") ++ APPEND_STRING (get_native_prid_str ()) ++ APPEND_STRING (", ISA features: ") ++ APPEND_STRING (isa_string) ++ } ++ else ++ APPEND_STRING (loongarch_cpu_strings[target->cpu_arch]); ++ ++ APPEND1 ('\0') ++ return XOBFINISH (&msg_obstack, const char *); ++} ++ ++static const char* ++multilib_enabled_abi_list () ++{ ++ int enabled_abi_idx[MULTILIB_LIST_LEN] = { 0 }; ++ const char* enabled_abi_str[MULTILIB_LIST_LEN] = { NULL }; ++ unsigned int j = 0; ++ ++ for (unsigned int i = 0; i < ABI_COUNT && j < MULTILIB_LIST_LEN; i++) ++ { ++ if (enabled_abi_types[abi_priority_list[i].base] ++ [abi_priority_list[i].ext]) ++ { ++ enabled_abi_idx[j++] = i; ++ } ++ } ++ ++ for (unsigned int k = 0; k < j; k++) ++ { ++ enabled_abi_str[k] = abi_str (abi_priority_list[enabled_abi_idx[k]]); ++ } ++ ++ for (unsigned int k = 0; k < j - 1; k++) ++ { ++ APPEND_STRING (enabled_abi_str[k]) ++ APPEND1 (',') ++ APPEND1 (' ') ++ } ++ APPEND_STRING (enabled_abi_str[j - 1]) ++ APPEND1 ('\0') ++ ++ return XOBFINISH (&msg_obstack, const char *); ++} ++ ++/* option status feedback for "gcc --help=target -Q" */ ++void ++loongarch_update_gcc_opt_status (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ (void) opts_set; ++ ++ /* status of -mabi */ ++ opts->x_la_opt_abi_base = target->abi.base; ++ ++ opts->x_target_flags |= ++ IS_LP64_ABI_BASE (target->abi.base) ? MASK_LP64 : 0; ++ ++ /* status of -march and -mtune */ ++ opts->x_la_opt_cpu_arch = target->cpu_arch; ++ opts->x_la_opt_cpu_tune = target->cpu_tune; ++ ++ /* status of -mfpu and -msimd */ ++ opts->x_la_opt_fpu = target->isa.fpu; ++ opts->x_la_opt_simd = target->isa.simd; ++} +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 21639fa74..33eb8b2da 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -1,5 +1,6 @@ +-/* Definitions for option handling for LARCH. +- Copyright (C) 1989-2018 Free Software Foundation, Inc. ++/* Definitions for loongarch-specific option handling. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. + + This file is part of GCC. + +@@ -17,18 +18,81 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +-#ifndef LARCH_OPTS_H +-#define LARCH_OPTS_H ++#ifndef LOONGARCH_OPTS_H ++#define LOONGARCH_OPTS_H + +-#define LARCH_ARCH_OPTION_NATIVE -1 ++#include "loongarch-def.h" + ++/* Target configuration */ ++extern struct loongarch_target la_target; + +-enum loongarch_code_model { +- LARCH_CMODEL_NORMAL, +- LARCH_CMODEL_TINY, +- LARCH_CMODEL_TINY_STATIC, +- LARCH_CMODEL_LARGE, +- LARCH_CMODEL_EXTREME ++/* Flag status */ ++struct loongarch_flags { ++ int flt; const char* flt_str; ++#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) ++ int sx[2]; + }; + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) ++ ++/* Initialize loongarch_target from separate option variables. */ ++void ++loongarch_init_target (struct loongarch_target *target, ++ int cpu_arch, int cpu_tune, int fpu, int simd, ++ int abi_base, int abi_ext, int cmodel); ++ ++ ++/* Handler for "-m" option combinations, ++ shared by the driver and the compiler proper. */ ++void ++loongarch_config_target (struct loongarch_target *target, ++ struct loongarch_flags *flags, ++ int follow_multilib_list_p); ++ ++/* option status feedback for "gcc --help=target -Q" */ ++void ++loongarch_update_gcc_opt_status (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set); + #endif ++ ++ ++/* Macros for common conditional expressions used in loongarch.{c,h,md} */ ++#define TARGET_CMODEL_NORMAL (la_target.cmodel == CMODEL_NORMAL) ++#define TARGET_CMODEL_TINY (la_target.cmodel == CMODEL_TINY) ++#define TARGET_CMODEL_TINY_STATIC (la_target.cmodel == CMODEL_TINY_STATIC) ++#define TARGET_CMODEL_LARGE (la_target.cmodel == CMODEL_LARGE) ++#define TARGET_CMODEL_EXTREME (la_target.cmodel == CMODEL_EXTREME) ++ ++#define TARGET_HARD_FLOAT (la_target.isa.fpu != ISA_EXT_NONE) ++#define TARGET_HARD_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D \ ++ || la_target.abi.base == ABI_BASE_LP64F) ++ ++#define TARGET_SOFT_FLOAT (la_target.isa.fpu == ISA_EXT_NONE) ++#define TARGET_SOFT_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64S) ++#define TARGET_SINGLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU32) ++#define TARGET_SINGLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64F) ++#define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) ++#define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) ++ ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) ++#define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ ++ || la_target.abi.base == ABI_BASE_LP64F \ ++ || la_target.abi.base == ABI_BASE_LP64S) ++ ++#define ISA_HAS_LSX (la_target.isa.simd == ISA_EXT_SIMD_LSX \ ++ || la_target.isa.simd == ISA_EXT_SIMD_LASX) ++#define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX) ++ ++ ++/* TARGET_ macros for use in *.md template conditionals */ ++#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) ++#define TARGET_uARCH_LA364 (la_target.cpu_tune == CPU_LA364) ++#define TARGET_uARCH_LA264 (la_target.cpu_tune == CPU_LA264) ++#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) ++ ++/* Note: optimize_size may vary across functions, ++ while -m[no]-memcpy imposes a global constraint. */ ++#define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P loongarch_do_optimize_block_move_p() ++ ++#endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index c36fdd37d..498d80514 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -1,9 +1,7 @@ +-/* Prototypes of target machine for GNU compiler. LARCH version. ++/* Prototypes of target machine for GNU compiler. LoongArch version. + Copyright (C) 1989-2018 Free Software Foundation, Inc. +- Contributed by A. Lichnewsky (lich@inria.inria.fr). +- Changed by Michael Meissner (meissner@osf.org). +- 64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and +- Brendan Eich (brendan@microunity.com). ++ Contributed by Loongson Ltd. ++ Based on MIPS target for GNU compiler. + + This file is part of GCC. + +@@ -21,24 +19,8 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +-#ifndef GCC_LARCH_PROTOS_H +-#define GCC_LARCH_PROTOS_H +- +-/* Describes how a symbol is used. +- +- SYMBOL_CONTEXT_CALL +- The symbol is used as the target of a call instruction. +- +- SYMBOL_CONTEXT_LEA +- The symbol is used in a load-address operation. +- +- SYMBOL_CONTEXT_MEM +- The symbol is used as the address in a MEM. */ +-enum loongarch_symbol_context { +- SYMBOL_CONTEXT_CALL, +- SYMBOL_CONTEXT_LEA, +- SYMBOL_CONTEXT_MEM +-}; ++#ifndef GCC_LOONGARCH_PROTOS_H ++#define GCC_LOONGARCH_PROTOS_H + + /* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address. + +@@ -57,67 +39,30 @@ enum loongarch_symbol_type { + SYMBOL_GOT_DISP, + SYMBOL_TLS, + SYMBOL_TLSGD, +- SYMBOL_TLSLDM, ++ SYMBOL_TLSLDM + }; + #define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1) + +-/* Classifies a type of call. +- +- LARCH_CALL_NORMAL +- A normal call or call_value pattern. +- +- LARCH_CALL_SIBCALL +- A sibcall or sibcall_value pattern. +- +- LARCH_CALL_EPILOGUE +- A call inserted in the epilogue. */ +-enum loongarch_call_type { +- LARCH_CALL_NORMAL, +- LARCH_CALL_SIBCALL, +- LARCH_CALL_EPILOGUE +-}; +- +-/* Controls the conditions under which certain instructions are split. +- +- SPLIT_IF_NECESSARY +- Only perform splits that are necessary for correctness +- (because no unsplit version exists). +- +- SPLIT_FOR_SPEED +- Perform splits that are necessary for correctness or +- beneficial for code speed. +- +- SPLIT_FOR_SIZE +- Perform splits that are necessary for correctness or +- beneficial for code size. */ +-enum loongarch_split_type { +- SPLIT_IF_NECESSARY, +- SPLIT_FOR_SPEED, +- SPLIT_FOR_SIZE +-}; + extern const char *const loongarch_fp_conditions[16]; + +-extern const char *loongarch_output_gpr_save (unsigned); ++/* Routines implemented in loongarch.c. */ ++extern rtx loongarch_emit_move (rtx, rtx); + extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); + extern void loongarch_expand_prologue (void); + extern void loongarch_expand_epilogue (bool); + extern bool loongarch_can_use_return_insn (void); +-extern rtx loongarch_function_value (const_tree, const_tree, enum machine_mode); +-extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_context, +- enum loongarch_symbol_type *); ++ ++extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *); + extern int loongarch_regno_mode_ok_for_base_p (int, machine_mode, bool); +-extern bool loongarch_stack_address_p (rtx, machine_mode); + extern int loongarch_address_insns (rtx, machine_mode, bool); + extern int loongarch_const_insns (rtx); + extern int loongarch_split_const_insns (rtx); + extern int loongarch_split_128bit_const_insns (rtx); + extern int loongarch_load_store_insns (rtx, rtx_insn *); + extern int loongarch_idiv_insns (machine_mode); +-extern rtx loongarch_emit_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_emit_binary (enum rtx_code, rtx, rtx, rtx); + #endif +-extern rtx loongarch_pic_base_register (rtx); + extern bool loongarch_split_symbol (rtx, rtx, machine_mode, rtx *); + extern rtx loongarch_unspec_address (rtx, enum loongarch_symbol_type); + extern rtx loongarch_strip_unspec_address (rtx); +@@ -126,9 +71,9 @@ extern bool loongarch_legitimize_move (machine_mode, rtx, rtx); + extern rtx loongarch_legitimize_call_address (rtx); + + extern rtx loongarch_subword (rtx, bool); +-extern bool loongarch_split_move_p (rtx, rtx, enum loongarch_split_type); +-extern void loongarch_split_move (rtx, rtx, enum loongarch_split_type, rtx); +-extern bool loongarch_split_move_insn_p (rtx, rtx, rtx); ++extern bool loongarch_split_move_p (rtx, rtx); ++extern void loongarch_split_move (rtx, rtx, rtx); ++extern bool loongarch_split_move_insn_p (rtx, rtx); + extern void loongarch_split_move_insn (rtx, rtx, rtx); + extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); +@@ -139,50 +84,29 @@ extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); + extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + extern bool loongarch_cfun_has_cprestore_slot_p (void); +-extern bool loongarch_cprestore_address_p (rtx, bool); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); + extern bool loongarch_expand_int_vec_cmp (rtx *); + extern bool loongarch_expand_fp_vec_cmp (rtx *); + extern void loongarch_expand_conditional_branch (rtx *); +-extern void loongarch_expand_conditional_move (rtx *); ++extern bool loongarch_expand_conditional_move_la464 (rtx *); + extern void loongarch_expand_conditional_trap (rtx); + #endif +-extern bool loongarch_get_pic_call_symbol (rtx *, int); + extern void loongarch_set_return_address (rtx, rtx); + extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); +-extern bool loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); + extern bool loongarch_expand_block_move (rtx, rtx, rtx); + +-extern void loongarch_init_cumulative_args (CUMULATIVE_ARGS *, tree); + extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT, +- HOST_WIDE_INT, bool); ++ HOST_WIDE_INT, bool); + extern bool loongarch_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT, +- HOST_WIDE_INT); +-extern bool loongarch_mem_fits_mode_p (machine_mode mode, rtx x); ++ HOST_WIDE_INT); + extern HOST_WIDE_INT loongarch_debugger_offset (rtx, HOST_WIDE_INT); + +-extern void loongarch_push_asm_switch (struct loongarch_asm_switch *); +-extern void loongarch_pop_asm_switch (struct loongarch_asm_switch *); + extern void loongarch_output_external (FILE *, tree, const char *); + extern void loongarch_output_ascii (FILE *, const char *, size_t); +-extern void loongarch_output_aligned_decl_common (FILE *, tree, const char *, +- unsigned HOST_WIDE_INT, +- unsigned int); +-extern void loongarch_declare_common_object (FILE *, const char *, +- const char *, unsigned HOST_WIDE_INT, +- unsigned int, bool); +-extern void loongarch_declare_object (FILE *, const char *, const char *, +- const char *, ...) ATTRIBUTE_PRINTF_4; +-extern void loongarch_declare_object_name (FILE *, const char *, tree); +-extern void loongarch_finish_declare_object (FILE *, tree, int, int); +-extern void loongarch_set_text_contents_type (FILE *, const char *, +- unsigned long, bool); +- + extern bool loongarch_small_data_pattern_p (rtx); + extern rtx loongarch_rewrite_small_data (rtx); + extern rtx loongarch_return_addr (int, rtx); +-extern bool loongarch_must_initialize_gp_p (void); + + extern bool loongarch_const_vector_same_val_p (rtx, machine_mode); + extern bool loongarch_const_vector_same_bytes_p (rtx, machine_mode); +@@ -194,26 +118,27 @@ extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode); + extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool); + extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT); + extern enum reg_class loongarch_secondary_reload_class (enum reg_class, +- machine_mode, +- rtx, bool); ++ machine_mode, ++ rtx, bool); + extern int loongarch_class_max_nregs (enum reg_class, machine_mode); + + extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int, +- unsigned int, +- machine_mode); ++ unsigned int, ++ machine_mode); + extern int loongarch_adjust_insn_length (rtx_insn *, int); + extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *, +- const char *, const char *); +-extern const char *loongarch_output_order_conditional_branch (rtx_insn *, rtx *, +- bool); +-extern const char *loongarch_output_equal_conditional_branch (rtx_insn *, rtx *, +- bool); ++ const char *, ++ const char *); ++extern const char *loongarch_output_order_conditional_branch (rtx_insn *, ++ rtx *, ++ bool); ++extern const char *loongarch_output_equal_conditional_branch (rtx_insn *, ++ rtx *, ++ bool); + extern const char *loongarch_output_division (const char *, rtx *); + extern const char *loongarch_lsx_output_division (const char *, rtx *); + extern const char *loongarch_output_probe_stack_range (rtx, rtx, rtx); + extern bool loongarch_hard_regno_rename_ok (unsigned int, unsigned int); +-extern bool loongarch_linked_madd_p (rtx_insn *, rtx_insn *); +-extern bool loongarch_store_data_bypass_p (rtx_insn *, rtx_insn *); + extern int loongarch_dspalu_bypass_p (rtx, rtx); + extern rtx loongarch_prefetch_cookie (rtx, rtx); + +@@ -226,9 +151,6 @@ extern const char *current_section_name (void); + extern unsigned int current_section_flags (void); + extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + +-extern bool and_operands_ok (machine_mode, rtx, rtx); +-extern bool loongarch_fmadd_bypass (rtx_insn *, rtx_insn *); +- + union loongarch_gen_fn_ptrs + { + rtx (*fn_8) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); +@@ -239,25 +161,26 @@ union loongarch_gen_fn_ptrs + }; + + extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs, +- rtx, rtx, rtx, rtx, rtx); ++ rtx, rtx, rtx, rtx, rtx); + + extern void loongarch_expand_vector_init (rtx, rtx); + extern void loongarch_expand_vec_unpack (rtx op[2], bool, bool); ++extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx); ++extern void loongarch_expand_vec_perm_1 (rtx[]); ++extern void loongarch_expand_vector_extract (rtx, rtx, int); ++extern void loongarch_expand_vector_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx); + + extern int loongarch_ldst_scaled_shift (machine_mode); + extern bool loongarch_signed_immediate_p (unsigned HOST_WIDE_INT, int, int); + extern bool loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int); +-extern bool loongarch_load_store_pair_p (bool, rtx *); +-extern bool loongarch_movep_target_p (rtx, rtx); + extern bool loongarch_12bit_offset_address_p (rtx, machine_mode); + extern bool loongarch_14bit_shifted_offset_address_p (rtx, machine_mode); ++extern bool loongarch_base_index_address_p (rtx, machine_mode); + extern bool loongarch_9bit_offset_address_p (rtx, machine_mode); +-extern bool lwsp_swsp_address_p (rtx, machine_mode); + extern rtx loongarch_expand_thread_pointer (rtx); + + extern bool loongarch_eh_uses (unsigned int); + extern bool loongarch_epilogue_uses (unsigned int); +-extern int loongarch_trampoline_code_size (void); + extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool); + extern bool loongarch_la464_128_store_p (rtx[]); + extern bool loongarch_la464_128_load_p (rtx[]); +@@ -270,10 +193,6 @@ typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx); + extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + +-extern void loongarch_declare_function_name(FILE *, const char *, tree); +-/* Routines implemented in loongarch-d.c */ +-extern void loongarch_d_target_versions (void); +- + /* Routines implemented in loongarch-c.c. */ + void loongarch_cpu_cpp_builtins (cpp_reader *); + +@@ -281,10 +200,12 @@ extern void loongarch_init_builtins (void); + extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *); + extern tree loongarch_builtin_decl (unsigned int, bool); + extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED, +- machine_mode, int); ++ machine_mode, int); + extern tree loongarch_builtin_vectorized_function (unsigned int, tree, tree); + extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); +- + extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); ++extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); ++extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); ++extern rtx loongarch_prefetch_cookie (rtx, rtx); + #endif /* ! GCC_LARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +new file mode 100644 +index 000000000..aca3d667b +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -0,0 +1,68 @@ ++/* Generated automatically by "genstr" from "loongarch-strings". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef LOONGARCH_STR_H ++#define LOONGARCH_STR_H ++ ++#define OPTSTR_ARCH "arch" ++#define OPTSTR_TUNE "tune" ++ ++#define STR_CPU_NATIVE "native" ++#define STR_CPU_ABI_DEFAULT "abi-default" ++#define STR_CPU_LOONGARCH64 "loongarch64" ++#define STR_CPU_LA464 "la464" ++#define STR_CPU_LA364 "la364" ++#define STR_CPU_LA264 "la264" ++#define STR_CPU_LA664 "la664" ++ ++#define STR_ISA_BASE_LA64V100 "la64" ++ ++#define OPTSTR_ISA_EXT_FPU "fpu" ++#define STR_NONE "none" ++#define STR_ISA_EXT_FPU0 "0" ++#define STR_ISA_EXT_FPU32 "32" ++#define STR_ISA_EXT_FPU64 "64" ++ ++#define OPTSTR_SOFT_FLOAT "soft-float" ++#define OPTSTR_SINGLE_FLOAT "single-float" ++#define OPTSTR_DOUBLE_FLOAT "double-float" ++ ++#define OPTSTR_ISA_EXT_SIMD "simd" ++#define STR_ISA_EXT_LSX "lsx" ++#define STR_ISA_EXT_LASX "lasx" ++ ++#define OPTSTR_ABI_BASE "abi" ++#define STR_ABI_BASE_LP64D "lp64d" ++#define STR_ABI_BASE_LP64F "lp64f" ++#define STR_ABI_BASE_LP64S "lp64s" ++#define STR_ABI_BASE_LP64 "lp64" ++ ++#define STR_ABI_EXT_BASE "base" ++ ++#define OPTSTR_CMODEL "cmodel" ++#define STR_CMODEL_NORMAL "normal" ++#define STR_CMODEL_TINY "tiny" ++#define STR_CMODEL_TS "tiny-static" ++#define STR_CMODEL_LARGE "large" ++#define STR_CMODEL_EXTREME "extreme" ++ ++#endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch-tables.opt b/gcc/config/loongarch/loongarch-tables.opt +deleted file mode 100644 +index 80794b564..000000000 +--- a/gcc/config/loongarch/loongarch-tables.opt ++++ /dev/null +@@ -1,34 +0,0 @@ +-; -*- buffer-read-only: t -*- +-; Generated automatically by genopt.sh from loongarch-cpus.def. +- +-; Copyright (C) 2011-2018 Free Software Foundation, Inc. +-; +-; This file is part of GCC. +-; +-; GCC is free software; you can redistribute it and/or modify it under +-; the terms of the GNU General Public License as published by the Free +-; Software Foundation; either version 3, or (at your option) any later +-; version. +-; +-; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +-; WARRANTY; without even the implied warranty of MERCHANTABILITY or +-; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-; for more details. +-; +-; You should have received a copy of the GNU General Public License +-; along with GCC; see the file COPYING3. If not see +-; . +- +-Enum +-Name(loongarch_arch_opt_value) Type(int) +-Known LARCH CPUs (for use with the -march= and -mtune= options): +- +-EnumValue +-Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly +- +-EnumValue +-Enum(loongarch_arch_opt_value) String(loongarch64) Value(0) Canonical +- +-EnumValue +-Enum(loongarch_arch_opt_value) String(la464) Value(1) Canonical +- +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +new file mode 100644 +index 000000000..bb01f2d98 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -0,0 +1,51 @@ ++/* Definitions for microarchitecture-related data structures. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef LOONGARCH_TUNE_H ++#define LOONGARCH_TUNE_H ++ ++/* RTX costs of various operations on the different architectures. */ ++struct loongarch_rtx_cost_data ++{ ++ unsigned short fp_add; ++ unsigned short fp_mult_sf; ++ unsigned short fp_mult_df; ++ unsigned short fp_div_sf; ++ unsigned short fp_div_df; ++ unsigned short int_mult_si; ++ unsigned short int_mult_di; ++ unsigned short int_div_si; ++ unsigned short int_div_di; ++ unsigned short branch_cost; ++ unsigned short memory_latency; ++}; ++ ++/* Costs to use when optimizing for size. */ ++extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size; ++ ++/* Cache size record of known processor models. */ ++struct loongarch_cache { ++ int l1d_line_size; /* bytes */ ++ int l1d_size; /* KiB */ ++ int l2d_size; /* kiB */ ++ int simultaneous_prefetches; /* number of parallel prefetch */ ++}; ++ ++#endif /* LOONGARCH_TUNE_H */ +diff --git a/gcc/config/loongarch/loongarch.c b/gcc/config/loongarch/loongarch.c +index e556f81e4..a1dde5a0f 100644 +--- a/gcc/config/loongarch/loongarch.c ++++ b/gcc/config/loongarch/loongarch.c +@@ -1,9 +1,7 @@ +-/* Subroutines used for LARCH code generation. +- Copyright (C) 1989-2018 Free Software Foundation, Inc. +- Contributed by A. Lichnewsky, lich@inria.inria.fr. +- Changes by Michael Meissner, meissner@osf.org. +- 64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and +- Brendan Eich, brendan@microunity.com. ++/* Subroutines used for LoongArch code generation. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Technology Co. Ltd.. ++ Based on MIPS and RISC-V target for GNU compiler. + + This file is part of GCC. + +@@ -63,8 +61,14 @@ along with GCC; see the file COPYING3. If not see + #include "target-globals.h" + #include "tree-pass.h" + #include "context.h" ++#include "shrink-wrap.h" + #include "builtins.h" + #include "rtl-iter.h" ++#include "cfgloop.h" ++#include "gimple-iterator.h" ++#include "tree-vectorizer.h" ++#include "params.h" ++#include "opts.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -76,48 +80,20 @@ along with GCC; see the file COPYING3. If not see + && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) + + /* Extract the symbol or label from UNSPEC wrapper X. */ +-#define UNSPEC_ADDRESS(X) \ +- XVECEXP (X, 0, 0) ++#define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0) + + /* Extract the symbol type from UNSPEC wrapper X. */ + #define UNSPEC_ADDRESS_TYPE(X) \ + ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) + +-/* The maximum distance between the top of the stack frame and the +- value $sp has when we save and restore registers. +-*/ +-#define LARCH_MAX_FIRST_STACK_STEP 0x7f0 +- + /* True if INSN is a loongarch.md pattern or asm statement. */ + /* ??? This test exists through the compiler, perhaps it should be +- moved to rtl.h. */ ++ moved to rtl.h. */ + #define USEFUL_INSN_P(INSN) \ + (NONDEBUG_INSN_P (INSN) \ + && GET_CODE (PATTERN (INSN)) != USE \ + && GET_CODE (PATTERN (INSN)) != CLOBBER) + +-/* If INSN is a delayed branch sequence, return the first instruction +- in the sequence, otherwise return INSN itself. */ +-#define SEQ_BEGIN(INSN) \ +- (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE \ +- ? as_a (XVECEXP (PATTERN (INSN), 0, 0)) \ +- : (INSN)) +- +-/* Likewise for the last instruction in a delayed branch sequence. */ +-#define SEQ_END(INSN) \ +- (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE \ +- ? as_a (XVECEXP (PATTERN (INSN), \ +- 0, \ +- XVECLEN (PATTERN (INSN), 0) - 1)) \ +- : (INSN)) +- +-/* Execute the following loop body with SUBINSN set to each instruction +- between SEQ_BEGIN (INSN) and SEQ_END (INSN) inclusive. */ +-#define FOR_EACH_SUBINSN(SUBINSN, INSN) \ +- for ((SUBINSN) = SEQ_BEGIN (INSN); \ +- (SUBINSN) != NEXT_INSN (SEQ_END (INSN)); \ +- (SUBINSN) = NEXT_INSN (SUBINSN)) +- + /* True if bit BIT is set in VALUE. */ + #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0) + +@@ -127,54 +103,25 @@ along with GCC; see the file COPYING3. If not see + A natural register + offset address. The register satisfies + loongarch_valid_base_register_p and the offset is a const_arith_operand. + ++ ADDRESS_REG_REG ++ A base register indexed by (optionally scaled) register. ++ + ADDRESS_CONST_INT + A signed 16-bit constant address. + + ADDRESS_SYMBOLIC: + A constant symbolic address. */ +-enum loongarch_address_type { ++enum loongarch_address_type ++{ + ADDRESS_REG, ++ ADDRESS_REG_REG, + ADDRESS_CONST_INT, + ADDRESS_SYMBOLIC + }; + + +-/* A class used to control a comdat-style stub that we output in each +- translation unit that needs it. */ +-class loongarch_one_only_stub { +-public: +- virtual ~loongarch_one_only_stub () {} +- +- /* Return the name of the stub. */ +- virtual const char *get_name () = 0; +- +- /* Output the body of the function to asm_out_file. */ +- virtual void output_body () = 0; +-}; +- +-/* Tuning information that is automatically derived from other sources +- (such as the scheduler). */ +-static struct { +- /* The architecture and tuning settings that this structure describes. */ +- enum processor arch; +- enum processor tune; +- +- /* True if the structure has been initialized. */ +- bool initialized_p; +- +-} loongarch_tuning_info; +- +-/* Information about an address described by loongarch_address_type. +- +- ADDRESS_CONST_INT +- No fields are used. +- +- ADDRESS_REG +- REG is the base register and OFFSET is the constant offset. +- +- ADDRESS_SYMBOLIC +- SYMBOL_TYPE is the type of symbol that the address references. */ +-struct loongarch_address_info { ++struct loongarch_address_info ++{ + enum loongarch_address_type type; + rtx reg; + rtx offset; +@@ -184,224 +131,82 @@ struct loongarch_address_info { + /* Method to load immediate number fields. + + METHOD_NORMAL: +- load immediate number 0-31 bit ++ Load bit 0-31 of the immediate number. + + METHOD_LU32I: +- load imm 32-51 bit ++ Load bit 32-51 of the immediate number. + + METHOD_LU52I: +- load imm 52-63 bit ++ load bit 52-63 of the immediate number. + + METHOD_INSV: +- imm 0xfff00000fffffxxx ++ immediates like 0xfff00000fffffxxx + */ +-enum loongarch_load_imm_method { ++enum loongarch_load_imm_method ++{ + METHOD_NORMAL, + METHOD_LU32I, + METHOD_LU52I, + METHOD_INSV + }; + +-/* One stage in a constant building sequence. These sequences have +- the form: +- +- A = VALUE[0] +- A = A CODE[1] VALUE[1] +- A = A CODE[2] VALUE[2] +- ... +- +- where A is an accumulator, each CODE[i] is a binary rtl operation +- and each VALUE[i] is a constant integer. CODE[0] is undefined. */ +-struct loongarch_integer_op { ++struct loongarch_integer_op ++{ + enum rtx_code code; +- unsigned HOST_WIDE_INT value; ++ HOST_WIDE_INT value; + enum loongarch_load_imm_method method; + }; + + /* The largest number of operations needed to load an integer constant. +- The worst accepted case for 64-bit constants is LUI,ORI,SLL,ORI,SLL,ORI. +- When the lowest bit is clear, we can try, but reject a sequence with +- an extra SLL at the end. */ +-#define LARCH_MAX_INTEGER_OPS 9 +- +-/* Costs of various operations on the different architectures. */ +- +-struct loongarch_rtx_cost_data +-{ +- unsigned short fp_add; +- unsigned short fp_mult_sf; +- unsigned short fp_mult_df; +- unsigned short fp_div_sf; +- unsigned short fp_div_df; +- unsigned short int_mult_si; +- unsigned short int_mult_di; +- unsigned short int_div_si; +- unsigned short int_div_di; +- unsigned short branch_cost; +- unsigned short memory_latency; +-}; +- +-/* Global variables for machine-dependent things. */ +- +-/* The -G setting, or the configuration's default small-data limit if +- no -G option is given. */ +-static unsigned int loongarch_small_data_threshold; +- +-/* The number of file directives written by loongarch_output_filename. */ +-int num_source_filenames; +- +-/* The name that appeared in the last .file directive written by +- loongarch_output_filename, or "" if loongarch_output_filename hasn't +- written anything yet. */ +-const char *current_function_file = ""; ++ The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI ++ or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME. */ ++#define LARCH_MAX_INTEGER_OPS 4 + + /* Arrays that map GCC register numbers to debugger register numbers. */ +-int loongarch_dbx_regno[FIRST_PSEUDO_REGISTER]; + int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER]; + +-/* The current instruction-set architecture. */ +-enum processor loongarch_arch; +-const struct loongarch_cpu_info *loongarch_arch_info; +- +-/* The processor that we should tune the code for. */ +-enum processor loongarch_tune; +-const struct loongarch_cpu_info *loongarch_tune_info; +- +-/* The ISA level associated with loongarch_arch. */ +-int loongarch_isa; +- +-/* The ISA revision level. */ +-int loongarch_isa_rev; +- +-/* Which cost information to use. */ +-static const struct loongarch_rtx_cost_data *loongarch_cost; +- + /* Index [M][R] is true if register R is allowed to hold a value of mode M. */ +-static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER]; ++static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE] ++ [FIRST_PSEUDO_REGISTER]; + + /* Index C is true if character C is a valid PRINT_OPERAND punctation + character. */ + static bool loongarch_print_operand_punct[256]; + +-static GTY (()) int loongarch_output_filename_first_time = 1; +- +-/* loongarch_use_pcrel_pool_p[X] is true if symbols of type X should be +- forced into a PC-relative constant pool. */ +-bool loongarch_use_pcrel_pool_p[NUM_SYMBOL_TYPES]; +- +-/* Cached value of can_issue_more. This is cached in loongarch_variable_issue hook +- and returned from loongarch_sched_reorder2. */ ++/* Cached value of can_issue_more. This is cached in loongarch_variable_issue ++ hook and returned from loongarch_sched_reorder2. */ + static int cached_can_issue_more; + + /* Index R is the smallest register class that contains register R. */ + const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = { +- GR_REGS, GR_REGS, GR_REGS, GR_REGS, +- JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, +- JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, +- SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, +- SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, +- SIBCALL_REGS, GR_REGS, GR_REGS, JALR_REGS, +- JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, +- JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, +- +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- ST_REGS, ST_REGS, ST_REGS, ST_REGS, +- ST_REGS, ST_REGS, ST_REGS, ST_REGS, +- FRAME_REGS, FRAME_REGS +-}; +- +-static tree loongarch_handle_interrupt_attr (tree *, tree, tree, int, bool *); +-static tree loongarch_handle_use_shadow_register_set_attr (tree *, tree, tree, int, +- bool *); +- +-/* The value of TARGET_ATTRIBUTE_TABLE. */ +-static const struct attribute_spec loongarch_attribute_table[] = { +- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, +- affects_type_identity, handler, exclude } */ +- { "long_call", 0, 0, false, true, true, false, NULL, NULL }, +- { "short_call", 0, 0, false, true, true, false, NULL, NULL }, +- { "far", 0, 0, false, true, true, false, NULL, NULL }, +- { "near", 0, 0, false, true, true, false, NULL, NULL }, +- { "nocompression", 0, 0, true, false, false, false, NULL, NULL }, +- /* Allow functions to be specified as interrupt handlers */ +- { "interrupt", 0, 1, false, true, true, false, loongarch_handle_interrupt_attr, +- NULL }, +- { "use_shadow_register_set", 0, 1, false, true, true, false, +- loongarch_handle_use_shadow_register_set_attr, NULL }, +- { "keep_interrupts_masked", 0, 0, false, true, true, false, NULL, NULL }, +- { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; +- +-/* A table describing all the processors GCC knows about; see +- loongarch-cpus.def for details. */ +-static const struct loongarch_cpu_info loongarch_cpu_info_table[] = { +-#define LARCH_CPU(NAME, CPU, ISA, FLAGS) \ +- { NAME, CPU, ISA, FLAGS }, +-#include "loongarch-cpus.def" +-#undef LARCH_CPU +-}; +- +-/* Default costs. If these are used for a processor we should look +- up the actual costs. */ +-#define DEFAULT_COSTS COSTS_N_INSNS (6), /* fp_add */ \ +- COSTS_N_INSNS (7), /* fp_mult_sf */ \ +- COSTS_N_INSNS (8), /* fp_mult_df */ \ +- COSTS_N_INSNS (23), /* fp_div_sf */ \ +- COSTS_N_INSNS (36), /* fp_div_df */ \ +- COSTS_N_INSNS (10), /* int_mult_si */ \ +- COSTS_N_INSNS (10), /* int_mult_di */ \ +- COSTS_N_INSNS (69), /* int_div_si */ \ +- COSTS_N_INSNS (69), /* int_div_di */ \ +- 2, /* branch_cost */ \ +- 4 /* memory_latency */ +- +-/* Floating-point costs for processors without an FPU. Just assume that +- all floating-point libcalls are very expensive. */ +-#define SOFT_FP_COSTS COSTS_N_INSNS (256), /* fp_add */ \ +- COSTS_N_INSNS (256), /* fp_mult_sf */ \ +- COSTS_N_INSNS (256), /* fp_mult_df */ \ +- COSTS_N_INSNS (256), /* fp_div_sf */ \ +- COSTS_N_INSNS (256) /* fp_div_df */ +- +-/* Costs to use when optimizing for size. */ +-static const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = { +- COSTS_N_INSNS (1), /* fp_add */ +- COSTS_N_INSNS (1), /* fp_mult_sf */ +- COSTS_N_INSNS (1), /* fp_mult_df */ +- COSTS_N_INSNS (1), /* fp_div_sf */ +- COSTS_N_INSNS (1), /* fp_div_df */ +- COSTS_N_INSNS (1), /* int_mult_si */ +- COSTS_N_INSNS (1), /* int_mult_di */ +- COSTS_N_INSNS (1), /* int_div_si */ +- COSTS_N_INSNS (1), /* int_div_di */ +- 2, /* branch_cost */ +- 4 /* memory_latency */ ++ GR_REGS, GR_REGS, GR_REGS, GR_REGS, ++ JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, ++ JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, ++ SIBCALL_REGS, JIRL_REGS, SIBCALL_REGS, SIBCALL_REGS, ++ SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, ++ SIBCALL_REGS, GR_REGS, GR_REGS, JIRL_REGS, ++ JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, ++ JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, ++ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS, ++ FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS, ++ FRAME_REGS, FRAME_REGS + }; + +-/* Costs to use when optimizing for speed, indexed by processor. */ +-static const struct loongarch_rtx_cost_data +- loongarch_rtx_cost_data[NUM_PROCESSOR_VALUES] = { +- { /* loongarch */ +- DEFAULT_COSTS +- }, +- { /* loongarch64 */ +- DEFAULT_COSTS +- }, +- { /* la464 */ +- DEFAULT_COSTS +- } +-}; ++/* Which cost information to use. */ ++static const struct loongarch_rtx_cost_data *loongarch_cost; + + /* Information about a single argument. */ +-struct loongarch_arg_info { ++struct loongarch_arg_info ++{ + /* True if the argument is at least partially passed on the stack. */ + bool stack_p; + +@@ -419,21 +224,6 @@ struct loongarch_arg_info { + unsigned int fpr_offset; + }; + +- +-/* Emit a move from SRC to DEST. Assume that the move expanders can +- handle all moves if !can_create_pseudo_p (). The distinction is +- important because, unlike emit_move_insn, the move expanders know +- how to force Pmode objects into the constant pool even when the +- constant pool address is not itself legitimate. */ +- +-rtx +-loongarch_emit_move (rtx dest, rtx src) +-{ +- return (can_create_pseudo_p () +- ? emit_move_insn (dest, src) +- : emit_move_insn_1 (dest, src)); +-} +- + /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at + least PARM_BOUNDARY bits of alignment, but will be given anything up + to PREFERRED_STACK_BOUNDARY bits if the type requires it. */ +@@ -470,7 +260,8 @@ loongarch_pass_mode_in_fpr_p (machine_mode mode) + return 0; + } + +-typedef struct { ++typedef struct ++{ + const_tree type; + HOST_WIDE_INT offset; + } loongarch_aggregate_field; +@@ -480,18 +271,18 @@ typedef struct { + + static int + loongarch_flatten_aggregate_field (const_tree type, +- loongarch_aggregate_field fields[2], +- int n, HOST_WIDE_INT offset, +- const int use_vecarg_p) ++ loongarch_aggregate_field fields[2], int n, ++ HOST_WIDE_INT offset, ++ const int use_vecarg_p) + { + switch (TREE_CODE (type)) + { + case RECORD_TYPE: +- /* Can't handle incomplete types nor sizes that are not fixed. */ +- if (!COMPLETE_TYPE_P (type) +- || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST +- || !tree_fits_uhwi_p (TYPE_SIZE (type))) +- return -1; ++ /* Can't handle incomplete types nor sizes that are not fixed. */ ++ if (!COMPLETE_TYPE_P (type) ++ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST ++ || !tree_fits_uhwi_p (TYPE_SIZE (type))) ++ return -1; + + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) +@@ -500,7 +291,8 @@ loongarch_flatten_aggregate_field (const_tree type, + return -1; + + HOST_WIDE_INT pos = offset + int_byte_position (f); +- n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos, 0); ++ n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, ++ pos, 0); + if (n < 0) + return -1; + } +@@ -513,7 +305,8 @@ loongarch_flatten_aggregate_field (const_tree type, + tree index = TYPE_DOMAIN (type); + tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); + int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type), +- subfields, 0, offset, 0); ++ subfields, 0, ++ offset, 0); + + /* Can't handle incomplete types nor sizes that are not fixed. */ + if (n_subfields <= 0 +@@ -528,7 +321,7 @@ loongarch_flatten_aggregate_field (const_tree type, + return -1; + + n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) +- - tree_to_uhwi (TYPE_MIN_VALUE (index)); ++ - tree_to_uhwi (TYPE_MIN_VALUE (index)); + gcc_assert (n_elts >= 0); + + for (HOST_WIDE_INT i = 0; i < n_elts; i++) +@@ -566,11 +359,11 @@ loongarch_flatten_aggregate_field (const_tree type, + } + + default: +- if (n < 2 ++ if ((n < 2 + && ((SCALAR_FLOAT_TYPE_P (type) + && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG) + || (INTEGRAL_TYPE_P (type) +- && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)) ++ && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))) + || (use_vecarg_p && VECTOR_TYPE_P (type) + && ((ISA_HAS_LSX && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_LSX_REG) + || (ISA_HAS_LASX && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_LASX_REG)))) +@@ -589,8 +382,8 @@ loongarch_flatten_aggregate_field (const_tree type, + + static int + loongarch_flatten_aggregate_argument (const_tree type, +- loongarch_aggregate_field fields[2], +- const int use_vecarg_p) ++ loongarch_aggregate_field fields[2], ++ const int use_vecarg_p) + { + if (!type || !((TREE_CODE (type) == RECORD_TYPE) + || (use_vecarg_p && TREE_CODE (type) == VECTOR_TYPE))) +@@ -603,9 +396,9 @@ loongarch_flatten_aggregate_argument (const_tree type, + two floating-point registers. If so, populate FIELDS accordingly. */ + + static unsigned +-loongarch_pass_aggregate_in_fpr_pair_p (const_tree type, +- loongarch_aggregate_field fields[2], +- const int use_vecarg_p) ++loongarch_pass_aggregate_num_fpr (const_tree type, ++ loongarch_aggregate_field fields[2], ++ const int use_vecarg_p) + { + int n = loongarch_flatten_aggregate_argument (type, fields, use_vecarg_p); + +@@ -616,13 +409,13 @@ loongarch_pass_aggregate_in_fpr_pair_p (const_tree type, + return n > 0 ? n : 0; + } + +-/* See whether TYPE is a record whose fields should be returned in one or ++/* See whether TYPE is a record whose fields should be returned in one + floating-point register and one integer register. If so, populate + FIELDS accordingly. */ + + static bool + loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type, +- loongarch_aggregate_field fields[2]) ++ loongarch_aggregate_field fields[2]) + { + unsigned num_int = 0, num_float = 0; + int n = loongarch_flatten_aggregate_argument (type, fields, 0); +@@ -640,20 +433,21 @@ loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type, + when the value has mode VALUE_MODE and the type has TYPE_MODE. The + two modes may be different for structures like: + +- struct __attribute__((packed)) foo { float f; } ++ struct __attribute__((packed)) foo { float f; } + +- where the SFmode value "f" is passed in REGNO but the struct itself +- has mode BLKmode. */ ++ where the SFmode value "f" is passed in REGNO but the struct itself ++ has mode BLKmode. */ + + static rtx + loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno, +- machine_mode value_mode) ++ machine_mode value_mode, ++ HOST_WIDE_INT offset) + { + rtx x = gen_rtx_REG (value_mode, regno); + + if (type_mode != value_mode) + { +- x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); ++ x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset)); + x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); + } + return x; +@@ -666,19 +460,16 @@ loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno, + + static rtx + loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1, +- machine_mode mode1, HOST_WIDE_INT offset1, +- unsigned regno2, machine_mode mode2, +- HOST_WIDE_INT offset2) ++ machine_mode mode1, HOST_WIDE_INT offset1, ++ unsigned regno2, machine_mode mode2, ++ HOST_WIDE_INT offset2) + { +- return gen_rtx_PARALLEL +- (mode, +- gen_rtvec (2, +- gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (mode1, regno1), +- GEN_INT (offset1)), +- gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (mode2, regno2), +- GEN_INT (offset2)))); ++ return gen_rtx_PARALLEL ( ++ mode, gen_rtvec (2, ++ gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1), ++ GEN_INT (offset1)), ++ gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2), ++ GEN_INT (offset2)))); + } + + /* Fill INFO with information about a single argument, and return an +@@ -689,9 +480,9 @@ loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1, + returning the argument, or false if passing the argument. */ + + static rtx +-loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS *cum, +- machine_mode mode, const_tree type, bool named, +- bool return_p) ++loongarch_get_arg_info (struct loongarch_arg_info *info, ++ const CUMULATIVE_ARGS *cum, machine_mode mode, ++ const_tree type, bool named, bool return_p) + { + unsigned num_bytes, num_words; + unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; +@@ -713,21 +504,23 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS * + unsigned gregno = gpr_base + info->gpr_offset; + + /* Pass one- or two-element floating-point aggregates in FPRs. */ +- if ((info->num_fprs = loongarch_pass_aggregate_in_fpr_pair_p (type, fields, use_vecarg_p)) ++ if ((info->num_fprs ++ = loongarch_pass_aggregate_num_fpr (type, fields, use_vecarg_p)) + && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) + switch (info->num_fprs) + { + case 1: + return loongarch_pass_fpr_single (mode, fregno, +- TYPE_MODE (fields[0].type)); ++ TYPE_MODE (fields[0].type), ++ fields[0].offset); + + case 2: + return loongarch_pass_fpr_pair (mode, fregno, +- TYPE_MODE (fields[0].type), +- fields[0].offset, +- fregno + 1, +- TYPE_MODE (fields[1].type), +- fields[1].offset); ++ TYPE_MODE (fields[0].type), ++ fields[0].offset, ++ fregno + 1, ++ TYPE_MODE (fields[1].type), ++ fields[1].offset); + + default: + gcc_unreachable (); +@@ -742,9 +535,10 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS * + return gen_rtx_REG (mode, fregno); + + case MODE_COMPLEX_FLOAT: +- return loongarch_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0, +- fregno + 1, GET_MODE_INNER (mode), +- GET_MODE_UNIT_SIZE (mode)); ++ return loongarch_pass_fpr_pair (mode, fregno, ++ GET_MODE_INNER (mode), 0, ++ fregno + 1, GET_MODE_INNER (mode), ++ GET_MODE_UNIT_SIZE (mode)); + + default: + gcc_unreachable (); +@@ -761,10 +555,11 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS * + if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) + std::swap (fregno, gregno); + +- return loongarch_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type), +- fields[0].offset, +- gregno, TYPE_MODE (fields[1].type), +- fields[1].offset); ++ return loongarch_pass_fpr_pair (mode, fregno, ++ TYPE_MODE (fields[0].type), ++ fields[0].offset, gregno, ++ TYPE_MODE (fields[1].type), ++ fields[1].offset); + } + } + +@@ -791,7 +586,7 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS * + + static rtx + loongarch_function_arg (cumulative_args_t cum_v, machine_mode mode, +- const_tree type, bool named) ++ const_tree type, bool named) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + struct loongarch_arg_info info; +@@ -806,7 +601,7 @@ loongarch_function_arg (cumulative_args_t cum_v, machine_mode mode, + + static void + loongarch_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, +- const_tree type, bool named) ++ const_tree type, bool named) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + struct loongarch_arg_info info; +@@ -825,11 +620,12 @@ loongarch_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, + + static int + loongarch_arg_partial_bytes (cumulative_args_t cum, +- machine_mode mode, tree type, bool named) ++ machine_mode mode, tree type, bool named) + { + struct loongarch_arg_info arg; + +- loongarch_get_arg_info (&arg, get_cumulative_args (cum), mode, type, named, false); ++ loongarch_get_arg_info (&arg, get_cumulative_args (cum), ++ mode, type, named, false); + return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; + } + +@@ -837,8 +633,9 @@ loongarch_arg_partial_bytes (cumulative_args_t cum, + VALTYPE is the return type and MODE is VOIDmode. For libcalls, + VALTYPE is null and MODE is the mode of the return value. */ + +-rtx +-loongarch_function_value (const_tree type, const_tree func, machine_mode mode) ++static rtx ++loongarch_function_value_1 (const_tree type, const_tree func, ++ machine_mode mode) + { + struct loongarch_arg_info info; + CUMULATIVE_ARGS args; +@@ -854,15 +651,34 @@ loongarch_function_value (const_tree type, const_tree func, machine_mode mode) + mode = promote_function_mode (type, mode, &unsigned_p, func, 1); + } + +- memset (&args, 0, sizeof args); ++ memset (&args, 0, sizeof (args)); + return loongarch_get_arg_info (&info, &args, mode, type, true, true); + } + +-/* Implement TARGET_PASS_BY_REFERENCE. */ ++ ++/* Implement TARGET_FUNCTION_VALUE. */ ++ ++static rtx ++loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type, ++ bool outgoing ATTRIBUTE_UNUSED) ++{ ++ return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode); ++} ++ ++/* Implement TARGET_LIBCALL_VALUE. */ ++ ++static rtx ++loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) ++{ ++ return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode); ++} ++ ++ ++/* Implement TARGET_PASS_BY_REFERENCE. */ + + static bool + loongarch_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, +- const_tree type, bool named) ++ const_tree type, bool named) + { + HOST_WIDE_INT size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + struct loongarch_arg_info info; +@@ -886,23 +702,25 @@ loongarch_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, + /* Implement TARGET_RETURN_IN_MEMORY. */ + + static bool +-loongarch_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) ++loongarch_return_in_memory (const_tree type, ++ const_tree fndecl ATTRIBUTE_UNUSED) + { + CUMULATIVE_ARGS args; + cumulative_args_t cum = pack_cumulative_args (&args); + + /* The rules for returning in memory are the same as for passing the + first named argument by reference. */ +- memset (&args, 0, sizeof args); ++ memset (&args, 0, sizeof (args)); + return loongarch_pass_by_reference (cum, TYPE_MODE (type), type, true); + } + + /* Implement TARGET_SETUP_INCOMING_VARARGS. */ + + static void +-loongarch_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, +- tree type, int *pretend_size ATTRIBUTE_UNUSED, +- int no_rtl) ++loongarch_setup_incoming_varargs (cumulative_args_t cum, ++ machine_mode mode, tree type, ++ int *pretend_size ATTRIBUTE_UNUSED, ++ int no_rtl) + { + CUMULATIVE_ARGS local_cum; + int gp_saved; +@@ -911,7 +729,8 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), mode, type, 1); ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), ++ mode, type, 1); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +@@ -920,12 +739,11 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, + { + rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, + REG_PARM_STACK_SPACE (cfun->decl) +- - gp_saved * UNITS_PER_WORD); ++ - gp_saved * UNITS_PER_WORD); + rtx mem = gen_frame_mem (BLKmode, ptr); + set_mem_alias_set (mem, get_varargs_alias_set ()); + +- move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, +- mem, gp_saved); ++ move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved); + } + if (REG_PARM_STACK_SPACE (cfun->decl) == 0) + cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; +@@ -941,8 +759,7 @@ loongarch_set_frame_expr (rtx frame_pattern) + + insn = get_last_insn (); + RTX_FRAME_RELATED_P (insn) = 1; +- REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, +- frame_pattern, ++ REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern, + REG_NOTES (insn)); + } + +@@ -963,8 +780,8 @@ static bool + loongarch_save_reg_p (unsigned int regno) + { + bool call_saved = !global_regs[regno] && !call_used_regs[regno]; +- bool might_clobber = crtl->saves_all_registers +- || df_regs_ever_live_p (regno); ++ bool might_clobber ++ = crtl->saves_all_registers || df_regs_ever_live_p (regno); + + if (call_saved && might_clobber) + return true; +@@ -978,15 +795,6 @@ loongarch_save_reg_p (unsigned int regno) + return false; + } + +-/* Determine whether to call GPR save/restore routines. */ +-static bool +-loongarch_use_save_libcall (const struct loongarch_frame_info *frame) +-{ +- // FIXME: if (!TARGET_SAVE_RESTORE || crtl->calls_eh_return || frame_pointer_needed) +- return false; +- +-} +- + /* Determine which GPR save/restore routine to call. */ + + static unsigned +@@ -998,43 +806,114 @@ loongarch_save_libcall_count (unsigned mask) + abort (); + } + ++/* Find an available register to be used as dynamic realign argument ++ pointer regsiter. Such a register will be written in prologue and ++ used in begin of body, so it must not be ++ 1. parameter passing register. ++ 2. GOT pointer. ++ We reuse static-chain register if it is available. Otherwise, we ++ use r15 for loongarch64(There may be a better choice. TODO). ++ ++ Return: the regno of chosen register. */ ++ ++static unsigned int ++find_drap_reg (void) ++{ ++ tree decl = cfun->decl; ++ /* Always use callee-saved register if there are no caller-saved ++ registers. */ ++ /* Use r15 for nested function or function need static chain. ++ Since function with tail call may use any caller-saved ++ registers in epilogue, DRAP must not use caller-saved ++ register in such case. */ ++ if (DECL_STATIC_CHAIN (decl) ++ || crtl->tail_call_emit) ++ return DRAP_REGNUM; ++ ++ return STATIC_CHAIN_REGNUM; ++} ++ ++ ++/* Return Dynamic Realign Argument Pointer RTX. Now there isn't any. */ ++ ++static rtx ++loongarch_get_drap_rtx (void) ++{ ++ if (crtl->stack_alignment_needed <= STACK_BOUNDARY ++ || (get_frame_size () == 0 && crtl->args.size == 0)) ++ { ++ crtl->stack_realign_needed = false; ++ return NULL; ++ } ++ ++ if (loongarch_force_drap) ++ crtl->need_drap = true; ++ ++ if (stack_realign_drap) ++ { ++ /* Assign DRAP to vDRAP and returns vDRAP */ ++ unsigned int regno = find_drap_reg (); ++ rtx drap_vreg; ++ rtx arg_ptr; ++ rtx_insn *seq, *insn; ++ ++ arg_ptr = gen_rtx_REG (Pmode, regno); ++ crtl->drap_reg = arg_ptr; ++ ++ start_sequence (); ++ drap_vreg = copy_to_reg (arg_ptr); ++ seq = get_insns (); ++ end_sequence (); ++ ++ insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); ++ if (!optimize) ++ { ++ add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ return drap_vreg; ++ } ++ else ++ return NULL; ++} ++ + /* Populate the current function's loongarch_frame_info structure. + +- LARCH stack frames grown downward. High addresses are at the top. +- +- +-------------------------------+ +- | | +- | incoming stack arguments | +- | | +- +-------------------------------+ <-- incoming stack pointer +- | | +- | callee-allocated save area | +- | for arguments that are | +- | split between registers and | +- | the stack | +- | | +- +-------------------------------+ <-- arg_pointer_rtx +- | | +- | callee-allocated save area | +- | for register varargs | +- | | +- +-------------------------------+ <-- hard_frame_pointer_rtx; +- | | stack_pointer_rtx + gp_sp_offset +- | GPR save area | + UNITS_PER_WORD +- | | +- +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset +- | | + UNITS_PER_HWVALUE +- | FPR save area | +- | | +- +-------------------------------+ <-- frame_pointer_rtx (virtual) +- | | +- | local variables | +- | | +- P +-------------------------------+ +- | | +- | outgoing stack arguments | +- | | +- +-------------------------------+ <-- stack_pointer_rtx ++ LoongArch stack frames grown downward. High addresses are at the top. ++ ++ +-------------------------------+ ++ | | ++ | incoming stack arguments | ++ | | ++ +-------------------------------+ <-- incoming stack pointer ++ | | ++ | callee-allocated save area | ++ | for arguments that are | ++ | split between registers and | ++ | the stack | ++ | | ++ +-------------------------------+ <-- arg_pointer_rtx (virtual) ++ | | ++ | callee-allocated save area | ++ | for register varargs | ++ | | ++ +-------------------------------+ <-- hard_frame_pointer_rtx; ++ | | stack_pointer_rtx + gp_sp_offset ++ | GPR save area | + UNITS_PER_WORD ++ | | ++ +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset ++ | | + UNITS_PER_HWVALUE ++ | FPR save area | ++ | | ++ +-------------------------------+ <-- frame_pointer_rtx (virtual) ++ | | ++ | local variables | ++ | | ++ P +-------------------------------+ ++ | | ++ | outgoing stack arguments | ++ | | ++ +-------------------------------+ <-- stack_pointer_rtx + + Dynamic stack allocations such as alloca insert data at point P. + They decrease stack_pointer_rtx but leave frame_pointer_rtx and +@@ -1050,58 +929,93 @@ loongarch_compute_frame_info (void) + frame = &cfun->machine->frame; + memset (frame, 0, sizeof (*frame)); + +- /* Find out which GPRs we need to save. */ +- for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ /* Find out which GPRs we need to save. */ ++ for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (loongarch_save_reg_p (regno)) ++ frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; ++ ++ /* If this function calls eh_return, we must also save and restore the ++ EH data registers. */ ++ if (crtl->calls_eh_return) ++ for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) ++ frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; ++ ++ /* Find out which FPRs we need to save. This loop must iterate over ++ the same space as its companion in loongarch_for_each_saved_reg. */ ++ if (TARGET_HARD_FLOAT) ++ for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (loongarch_save_reg_p (regno)) +- frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; +- +- /* If this function calls eh_return, we must also save and restore the +- EH data registers. */ +- if (crtl->calls_eh_return) +- for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) +- frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; +- +- /* Find out which FPRs we need to save. This loop must iterate over +- the same space as its companion in loongarch_for_each_saved_reg. */ +- if (TARGET_HARD_FLOAT) +- for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) +- if (loongarch_save_reg_p (regno)) +- frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; +- +- /* At the bottom of the frame are any outgoing stack arguments. */ +- offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size); +- /* Next are local stack variables. */ +- offset += LARCH_STACK_ALIGN (get_frame_size ()); +- /* The virtual frame pointer points above the local variables. */ ++ frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; ++ ++ /* Treat drap reg as a callee-saved reg. */ ++ if (stack_realign_drap) ++ frame->mask |= 1 << (find_drap_reg ()), num_x_saved++; ++ ++ /* At the bottom of the frame are any outgoing stack arguments. */ ++ offset = LARCH_STACK_ALIGN2 (crtl->outgoing_args_size); ++ /* Next are local stack variables. */ ++ offset += LARCH_STACK_ALIGN2 (get_frame_size ()); ++ /* The virtual frame pointer points above the local variables. */ + frame->frame_pointer_offset = offset; +- /* Next are the callee-saved FPRs. */ ++ /* Next are the callee-saved FPRs. */ + if (frame->fmask) +- offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); +- frame->fp_sp_offset = offset - UNITS_PER_FP_REG; +- /* Next are the callee-saved GPRs. */ ++ { ++ if (crtl->stack_realign_needed) ++ offset += num_f_saved * UNITS_PER_FP_REG; ++ else ++ offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); ++ frame->fp_sp_offset = offset - UNITS_PER_FP_REG; ++ } ++ else ++ frame->fp_sp_offset = offset; ++ /* Next are the callee-saved GPRs. */ + if (frame->mask) + { +- unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); +- unsigned num_save_restore = 1 + loongarch_save_libcall_count (frame->mask); ++ unsigned x_save_size; ++ if (crtl->stack_realign_needed) ++ x_save_size = num_x_saved * UNITS_PER_WORD; ++ else ++ x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); ++ unsigned num_save_restore ++ = 1 + loongarch_save_libcall_count (frame->mask); + + /* Only use save/restore routines if they don't alter the stack size. */ + if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size) + frame->save_libcall_adjustment = x_save_size; + + offset += x_save_size; ++ frame->gp_sp_offset = offset - UNITS_PER_WORD; + } +- frame->gp_sp_offset = offset - UNITS_PER_WORD; +- /* The hard frame pointer points above the callee-saved GPRs. */ +- frame->hard_frame_pointer_offset = offset; +- /* Above the hard frame pointer is the callee-allocated varags save area. */ +- offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); ++ else ++ frame->gp_sp_offset = offset; ++ ++ /* The hard frame pointer points above the callee-saved GPRs. */ ++ if (crtl->stack_realign_needed) ++ frame->hard_frame_pointer_offset = frame->gp_sp_offset; /* For dwarf. */ ++ else ++ frame->hard_frame_pointer_offset = offset; ++ ++ /* Realign here for saving space if crtl->stack_realign_needed is true. */ ++ if (stack_realign_drap) ++ offset = LARCH_STACK_ALIGN2 (offset); ++ else if (stack_realign_fp) ++ offset = LARCH_STACK_ALIGN (offset); ++ ++ /* Above the hard frame pointer is the callee-allocated varags save area. */ ++ if (stack_realign_fp) ++ offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); ++ else ++ offset += LARCH_STACK_ALIGN2 (cfun->machine->varargs_size); + /* Next is the callee-allocated area for pretend stack arguments. */ +- offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size); ++ if (stack_realign_fp) ++ offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size); ++ else ++ offset += LARCH_STACK_ALIGN2 (crtl->args.pretend_args_size); + /* Arg pointer must be below pretend args, but must be above alignment + padding. */ + frame->arg_pointer_offset = offset - crtl->args.pretend_args_size; + frame->total_size = offset; +- /* Next points the incoming stack pointer and any incoming arguments. */ ++ /* Next points the incoming stack pointer and any incoming arguments. */ + + /* Only use save/restore routines when the GPRs are atop the frame. */ + if (frame->hard_frame_pointer_offset != frame->total_size) +@@ -1117,8 +1031,6 @@ loongarch_initial_elimination_offset (int from, int to) + { + HOST_WIDE_INT src, dest; + +- loongarch_compute_frame_info (); +- + if (to == HARD_FRAME_POINTER_REGNUM) + dest = cfun->machine->frame.hard_frame_pointer_offset; + else if (to == STACK_POINTER_REGNUM) +@@ -1145,8 +1057,8 @@ typedef void (*loongarch_save_restore_fn) (rtx, rtx); + stack pointer. */ + + static void +-loongarch_save_restore_reg (machine_mode mode, int regno, +- HOST_WIDE_INT offset, loongarch_save_restore_fn fn) ++loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, ++ loongarch_save_restore_fn fn) + { + rtx mem; + +@@ -1159,12 +1071,29 @@ loongarch_save_restore_reg (machine_mode mode, int regno, + of the frame. */ + + static void +-loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, loongarch_save_restore_fn fn) ++loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, ++ loongarch_save_restore_fn fn) + { + HOST_WIDE_INT offset; + +- /* Save the link register and s-registers. */ ++ /* Save the link register and s-registers. */ + offset = cfun->machine->frame.gp_sp_offset - sp_offset; ++ ++ /* The drap reg and fp reg have been saved in loongarch_expand_prologue ++ * when stack_realign_drap is true. */ ++ if (stack_realign_drap) ++ offset -= UNITS_PER_WORD * cfun->machine->frame.gpr_saved_num; ++ ++ /* Save fp reg first for access incoming-args in stack easily ++ * when stack_realign_fp is true. */ ++ if (stack_realign_fp) ++ { ++ loongarch_save_restore_reg (word_mode, HARD_FRAME_POINTER_REGNUM, ++ offset, fn); ++ cfun->machine->frame.mask &= (~(1LL << HARD_FRAME_POINTER_REGNUM)); ++ offset -= UNITS_PER_WORD; ++ } ++ + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { +@@ -1172,6 +1101,10 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, loongarch_save_restore_fn + offset -= UNITS_PER_WORD; + } + ++ /* Undo. */ ++ if (stack_realign_fp) ++ cfun->machine->frame.mask |= (1LL << HARD_FRAME_POINTER_REGNUM); ++ + /* This loop must iterate over the same space as its companion in + loongarch_compute_frame_info. */ + offset = cfun->machine->frame.fp_sp_offset - sp_offset; +@@ -1185,6 +1118,19 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, loongarch_save_restore_fn + } + } + ++/* Emit a move from SRC to DEST. Assume that the move expanders can ++ handle all moves if !can_create_pseudo_p (). The distinction is ++ important because, unlike emit_move_insn, the move expanders know ++ how to force Pmode objects into the constant pool even when the ++ constant pool address is not itself legitimate. */ ++ ++rtx ++loongarch_emit_move (rtx dest, rtx src) ++{ ++ return (can_create_pseudo_p () ? emit_move_insn (dest, src) ++ : emit_move_insn_1 (dest, src)); ++} ++ + /* Save register REG to MEM. Make the instruction frame-related. */ + + static void +@@ -1207,575 +1153,690 @@ loongarch_restore_reg (rtx reg, rtx mem) + RTX_FRAME_RELATED_P (insn) = 1; + } + +-/* Return the code to invoke the GPR save routine. */ +- +-const char * +-loongarch_output_gpr_save (unsigned mask) +-{ +- static char s[32]; +- unsigned n = loongarch_save_libcall_count (mask); +- +- ssize_t bytes = snprintf (s, sizeof (s), "call\tt0,__loongarch_save_%u", n); +- gcc_assert ((size_t) bytes < sizeof (s)); +- +- return s; +-} +- +-#define IMM_BITS 12 +- +-#define IMM_REACH (1LL << IMM_BITS) +- + /* For stack frames that can't be allocated with a single ADDI instruction, + compute the best value to initially allocate. It must at a minimum +- allocate enough space to spill the callee-saved registers. If TARGET_RVC, +- try to pick a value that will allow compression of the register saves +- without adding extra instructions. */ ++ allocate enough space to spill the callee-saved registers. */ + + static HOST_WIDE_INT + loongarch_first_stack_step (struct loongarch_frame_info *frame) + { +- if (SMALL_OPERAND (frame->total_size)) ++ ++ /* Only for fpr/gpr saved regs first when stack_realign_fp is true. */ ++ if (stack_realign_fp) ++ return frame->total_size - frame->frame_pointer_offset; ++ ++ HOST_WIDE_INT realign_size = crtl->stack_alignment_needed / BITS_PER_UNIT; ++ ++ if (IMM12_OPERAND (frame->total_size)) + return frame->total_size; + +- HOST_WIDE_INT min_first_step = +- LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset); ++ HOST_WIDE_INT min_first_step ++ = LARCH_STACK_ALIGN2 (frame->total_size - frame->fp_sp_offset); + HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; + HOST_WIDE_INT min_second_step = frame->total_size - max_first_step; +- gcc_assert (min_first_step <= max_first_step); + + /* As an optimization, use the least-significant bits of the total frame +- size, so that the second adjustment step is just LUI + ADD. */ +- if (!SMALL_OPERAND (min_second_step) ++ size, so that the second adjustment step is just LU12I + ADD. */ ++ if (!IMM12_OPERAND (min_second_step) + && frame->total_size % IMM_REACH < IMM_REACH / 2 + && frame->total_size % IMM_REACH >= min_first_step) + return frame->total_size % IMM_REACH; + +- return max_first_step; +-} +- +-static rtx +-loongarch_adjust_libcall_cfi_prologue () +-{ +- rtx dwarf = NULL_RTX; +- rtx adjust_sp_rtx, reg, mem, insn; +- int saved_size = cfun->machine->frame.save_libcall_adjustment; +- int offset; +- +- for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) +- if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) +- { +- /* The save order is ra, s0 to s8. */ +- if (regno == RETURN_ADDR_REGNUM) +- offset = saved_size - UNITS_PER_WORD; +- else +- offset = saved_size - ((regno - S0_REGNUM + 2) * UNITS_PER_WORD); +- +- reg = gen_rtx_REG (SImode, regno); +- mem = gen_frame_mem (SImode, plus_constant (Pmode, +- stack_pointer_rtx, +- offset)); +- +- insn = gen_rtx_SET (mem, reg); +- dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf); +- } +- +- /* Debug info for adjust sp. */ +- adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, +- stack_pointer_rtx, GEN_INT (-saved_size)); +- dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, +- dwarf); +- return dwarf; ++ return crtl->stack_realign_needed ? (max_first_step < realign_size ++ ? realign_size ++ : ROUND_DOWN (max_first_step, ++ realign_size)) ++ : max_first_step; + } + + static void + loongarch_emit_stack_tie (void) + { +- if (Pmode == SImode) +- emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx)); +- else +- emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx)); +-} +- +-/* Return nonzero if this function is known to have a null epilogue. +- This allows the optimizer to omit jumps to jumps if no stack +- was created. */ +- +-bool +-loongarch_can_use_return_insn (void) +-{ +- return reload_completed && cfun->machine->frame.total_size == 0; ++ emit_insn (PMODE_INSN (gen_stack_tie, ++ (stack_pointer_rtx, hard_frame_pointer_rtx))); + } + +-static rtx +-loongarch_adjust_libcall_cfi_epilogue () +-{ +- rtx dwarf = NULL_RTX; +- rtx adjust_sp_rtx, reg; +- int saved_size = cfun->machine->frame.save_libcall_adjustment; +- +- /* Debug info for adjust sp. */ +- adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, +- stack_pointer_rtx, GEN_INT (saved_size)); +- dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, +- dwarf); +- +- for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) +- if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) +- { +- reg = gen_rtx_REG (SImode, regno); +- dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); +- } ++#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + +- return dwarf; +-} ++#if PROBE_INTERVAL > 16384 ++#error Cannot use indexed addressing mode for stack probing ++#endif + +-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P +- says which. */ ++/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, ++ inclusive. These are offsets from the current stack pointer. */ + +-void +-loongarch_expand_epilogue (bool sibcall_p) ++static void ++loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + { +- /* Split the frame into two. STEP1 is the amount of stack we should +- deallocate before restoring the registers. STEP2 is the amount we +- should deallocate afterwards. ++ HOST_WIDE_INT rounded_size; ++ rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode); ++ rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode); + +- Start off by assuming that no registers need to be restored. */ +- struct loongarch_frame_info *frame = &cfun->machine->frame; +- unsigned mask = frame->mask; +- HOST_WIDE_INT step1 = frame->total_size; +- HOST_WIDE_INT step2 = 0; +- bool use_restore_libcall = !sibcall_p && loongarch_use_save_libcall (frame); +- rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); +- rtx insn; ++ size = size + first; ++ /* Sanity check for the addressing mode we're going to use. */ ++ gcc_assert (first <= 16384); + +- /* We need to add memory barrier to prevent read from deallocated stack. */ +- bool need_barrier_p = (get_frame_size () +- + cfun->machine->frame.arg_pointer_offset) != 0; ++ /* Step 1: round SIZE to the previous multiple of the interval. */ + +- if (!sibcall_p && loongarch_can_use_return_insn ()) +- { +- emit_jump_insn (gen_return ()); +- return; +- } ++ rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); + +- /* Move past any dynamic stack allocations. */ +- if (cfun->calls_alloca) ++ /* Step 2: compute initial and final value of the loop counter. */ ++ ++ emit_move_insn (r14, GEN_INT (PROBE_INTERVAL)); ++ /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ ++ if (rounded_size != 0) + { +- /* Emit a barrier to prevent loads from a deallocated stack. */ +- loongarch_emit_stack_tie (); +- need_barrier_p = false; ++ emit_move_insn (r12, GEN_INT (rounded_size)); ++ emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, ++ stack_pointer_rtx, r12))); + +- rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); +- if (!SMALL_OPERAND (INTVAL (adjust))) +- { +- loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust); +- adjust = N_LARCH_PROLOGUE_TEMP (Pmode); +- } ++ /* Step 3: the loop + +- insn = emit_insn ( +- gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, +- adjust)); ++ do ++ { ++ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL ++ probe at TEST_ADDR ++ } ++ while (TEST_ADDR != LAST_ADDR) + +- rtx dwarf = NULL_RTX; +- rtx cfa_adjust_value = gen_rtx_PLUS ( +- Pmode, hard_frame_pointer_rtx, +- GEN_INT (-frame->hard_frame_pointer_offset)); +- rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); +- dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); +- RTX_FRAME_RELATED_P (insn) = 1; ++ probes at FIRST + N * PROBE_INTERVAL for values of N from 1 ++ until it is equal to ROUNDED_SIZE. */ + +- REG_NOTES (insn) = dwarf; ++ emit_insn (PMODE_INSN (gen_probe_stack_range, (stack_pointer_rtx, ++ stack_pointer_rtx, r12, r14))); + } + +- /* If we need to restore registers, deallocate as much stack as +- possible in the second step without going out of range. */ +- if ((frame->mask | frame->fmask) != 0) +- { +- step2 = loongarch_first_stack_step (frame); +- step1 -= step2; +- } ++ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time ++ that SIZE is equal to ROUNDED_SIZE. */ + +- /* Set TARGET to BASE + STEP1. */ +- if (step1 > 0) ++ if (size != rounded_size) + { +- /* Emit a barrier to prevent loads from a deallocated stack. */ +- loongarch_emit_stack_tie (); +- need_barrier_p = false; +- +- /* Get an rtx for STEP1 that we can add to BASE. */ +- rtx adjust = GEN_INT (step1); +- if (!SMALL_OPERAND (step1)) ++ if (size - rounded_size >= PROBE_INTERVAL/2) + { +- loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust); +- adjust = N_LARCH_PROLOGUE_TEMP (Pmode); ++ emit_move_insn (r14, GEN_INT (size - rounded_size)); ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, gen_rtx_MINUS (Pmode, ++ stack_pointer_rtx, ++ r14))); + } ++ else ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ GEN_INT (rounded_size - size)))); + +- insn = emit_insn ( +- gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, adjust)); +- +- rtx dwarf = NULL_RTX; +- rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- GEN_INT (step2)); +- +- dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); +- RTX_FRAME_RELATED_P (insn) = 1; +- +- REG_NOTES (insn) = dwarf; + } + +- if (use_restore_libcall) +- frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ +- +- /* Restore the registers. */ +- loongarch_for_each_saved_reg (frame->total_size - step2, loongarch_restore_reg); +- +- if (use_restore_libcall) ++ if (first) + { +- frame->mask = mask; /* Undo the above fib. */ +- gcc_assert (step2 >= frame->save_libcall_adjustment); +- step2 -= frame->save_libcall_adjustment; ++ emit_move_insn (r12, GEN_INT (first)); ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, r12))); + } + +- if (need_barrier_p) +- loongarch_emit_stack_tie (); ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); ++} + +- /* Deallocate the final bit of the frame. */ +- if (step2 > 0) +- { +- insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (step2))); ++/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are ++ absolute addresses. */ ++const char * ++loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) ++{ ++ static int labelno = 0; ++ char loop_lab[32], tmp[64]; ++ rtx xops[3]; + +- rtx dwarf = NULL_RTX; +- rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- const0_rtx); +- dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); +- RTX_FRAME_RELATED_P (insn) = 1; ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + +- REG_NOTES (insn) = dwarf; ++ /* Loop. */ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); ++ ++ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ ++ xops[0] = reg1; ++ xops[1] = GEN_INT (-PROBE_INTERVAL); ++ xops[2] = reg3; ++ if (TARGET_64BIT) ++ output_asm_insn ("sub.d\t%0,%0,%2", xops); ++ else ++ output_asm_insn ("sub.w\t%0,%0,%2", xops); ++ ++ /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */ ++ xops[1] = reg2; ++ strcpy (tmp, "bne\t%0,%1,"); ++ if (TARGET_64BIT) ++ output_asm_insn ("st.d\t$r0,%0,0", xops); ++ else ++ output_asm_insn ("st.w\t$r0,%0,0", xops); ++ output_asm_insn (strcat (tmp, &loop_lab[1]), xops); ++ ++ return ""; ++} ++ ++/* Expand the "prologue" pattern. */ ++ ++void ++loongarch_expand_prologue (void) ++{ ++ struct loongarch_frame_info *frame; ++ HOST_WIDE_INT size; ++ rtx insn; ++ HOST_WIDE_INT realign_size; ++ HOST_WIDE_INT offset; ++ unsigned mask; ++ HOST_WIDE_INT saved_gpr_num = 0; ++ ++ /* Finalize crtl->stack_realign_needed and frame_pointer_needed flags. */ ++ if((crtl->stack_realign_needed || (!flag_omit_frame_pointer && optimize)) && loongarch_stack_realign) ++ { ++ unsigned int incoming_stack_boundary ++ = (crtl->parm_stack_boundary > PREFERRED_STACK_BOUNDARY ++ ? crtl->parm_stack_boundary : PREFERRED_STACK_BOUNDARY); ++ unsigned int stack_alignment ++ = (crtl->is_leaf ++ ? crtl->max_used_stack_slot_alignment ++ : crtl->stack_alignment_needed); ++ unsigned int stack_realign ++ = (incoming_stack_boundary < stack_alignment); ++ ++ if ((get_frame_size () + crtl->outgoing_args_size) == 0 ++ && (crtl->args.size == 0) ++ && frame_pointer_needed ++ && crtl->is_leaf ++ && crtl->sp_is_unchanging ++ && !cfun->calls_alloca ++ && !crtl->calls_eh_return ++ && !(STACK_CHECK_MOVING_SP ++ && flag_stack_check ++ && flag_exceptions ++ && cfun->can_throw_non_call_exceptions)) ++ { ++ /* If drap has been set, but it actually isn't live at the ++ start of the function, there is no reason to set it up. */ ++ if (crtl->drap_reg) ++ { ++ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; ++ if (! REGNO_REG_SET_P (DF_LR_IN (bb), ++ REGNO (crtl->drap_reg))) ++ { ++ crtl->drap_reg = NULL_RTX; ++ crtl->need_drap = false; ++ } ++ } ++ frame_pointer_needed = false; ++ crtl->stack_realign_needed = false; ++ crtl->max_used_stack_slot_alignment = incoming_stack_boundary; ++ crtl->stack_alignment_needed = incoming_stack_boundary; ++ crtl->stack_alignment_estimated = incoming_stack_boundary; ++ if (crtl->preferred_stack_boundary > incoming_stack_boundary) ++ crtl->preferred_stack_boundary = incoming_stack_boundary; ++ ++ df_finish_pass (true); ++ df_scan_alloc (NULL); ++ df_scan_blocks (); ++ df_compute_regs_ever_live (true); ++ df_analyze (); ++ loongarch_compute_frame_info(); ++ } ++ } ++ ++ frame = &cfun->machine->frame; ++ size = frame->total_size; ++ ++ mask = frame->mask; ++ ++ realign_size = crtl->stack_alignment_needed / BITS_PER_UNIT; ++ ++ if (flag_stack_usage_info) ++ { ++ if (stack_realign_drap) ++ { ++ current_function_dynamic_stack_size += crtl->stack_alignment_needed / BITS_PER_UNIT; ++ } ++ current_function_static_stack_size = size; + } + +- if (use_restore_libcall) ++ /* When stack_realign_drap is true, save current sp in drap-reg then realign. */ ++ if (stack_realign_drap) + { +- rtx dwarf = loongarch_adjust_libcall_cfi_epilogue (); +- insn = emit_insn (gen_gpr_restore (GEN_INT (loongarch_save_libcall_count (mask)))); ++ rtx tmp_reg = plus_constant (Pmode, stack_pointer_rtx, 0); ++ insn = emit_insn (gen_rtx_SET (crtl->drap_reg, tmp_reg)); + RTX_FRAME_RELATED_P (insn) = 1; +- REG_NOTES (insn) = dwarf; + +- emit_jump_insn (gen_gpr_restore_return (ra)); +- return; ++ int log2_realigned_bytes = exact_log2 (realign_size); ++ tmp_reg = gen_rtx_REG (Pmode, GP_REG_FIRST); ++ insn = emit_insn (gen_insvdi (stack_pointer_rtx, ++ GEN_INT (log2_realigned_bytes), ++ const0_rtx, ++ tmp_reg)); ++ insn = gen_anddi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-realign_size)); ++ loongarch_set_frame_expr (insn); + } + +- /* Add in the __builtin_eh_return stack adjustment. */ +- if (crtl->calls_eh_return) +- emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, +- EH_RETURN_STACKADJ_RTX)); ++ /* Save the registers. */ ++ if ((frame->mask | frame->fmask) != 0) ++ { ++ HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame)); + +- if (!sibcall_p) +- emit_jump_insn (gen_simple_return_internal (ra)); +-} ++ /* Save fp first for dwarf. */ ++ if (stack_realign_drap) ++ { ++ gcc_assert (step1 % realign_size == 0); ++ if (frame->mask & (1LL << HARD_FRAME_POINTER_REGNUM)) ++ { ++ emit_insn (gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-(frame->total_size ++ - frame->gp_sp_offset)))); ++ step1 -= (frame->total_size - frame->gp_sp_offset); ++ loongarch_save_restore_reg (word_mode, HARD_FRAME_POINTER_REGNUM, ++ 0, loongarch_save_reg); ++ cfun->machine->frame.mask ++ = frame->mask & ~(1LL << HARD_FRAME_POINTER_REGNUM); ++ saved_gpr_num ++; ++ } ++ /* Set up the frame pointer, if we're using one. */ ++ if (frame_pointer_needed) ++ { ++ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- +-static rtx loongarch_find_pic_call_symbol (rtx_insn *, rtx, bool); +-static int loongarch_register_move_cost (machine_mode, reg_class_t, +- reg_class_t); +- +-/* Predicates to test for presence of "near"/"short_call" and "far"/"long_call" +- attributes on the given TYPE. */ ++ loongarch_emit_stack_tie (); ++ } ++ } + +-static bool +-loongarch_near_type_p (const_tree type) +-{ +- return (lookup_attribute ("short_call", TYPE_ATTRIBUTES (type)) != NULL +- || lookup_attribute ("near", TYPE_ATTRIBUTES (type)) != NULL); +-} ++ if (!IMM12_OPERAND (-step1) && stack_realign_drap) ++ { ++ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-step1)); ++ emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ LARCH_PROLOGUE_TEMP (Pmode))); + +-static bool +-loongarch_far_type_p (const_tree type) +-{ +- return (lookup_attribute ("long_call", TYPE_ATTRIBUTES (type)) != NULL +- || lookup_attribute ("far", TYPE_ATTRIBUTES (type)) != NULL); +-} ++ /* Describe the effect of the previous instructions. */ ++ insn = plus_constant (Pmode, stack_pointer_rtx, -step1); ++ insn = gen_rtx_SET (stack_pointer_rtx, insn); ++ loongarch_set_frame_expr (insn); ++ } ++ else ++ { ++ insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-step1)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ } + ++ if (saved_gpr_num && stack_realign_drap) ++ size -= (step1 + frame->total_size - frame->gp_sp_offset); ++ else ++ size -= step1; + +-/* Check if the interrupt attribute is set for a function. */ ++ if (stack_realign_drap && (frame->mask & (1LL << find_drap_reg ()))) ++ { ++ offset = cfun->machine->frame.gp_sp_offset - size ++ - UNITS_PER_WORD * saved_gpr_num; ++ loongarch_save_restore_reg (word_mode, find_drap_reg (), ++ offset, loongarch_save_reg); ++ cfun->machine->frame.mask ++ = frame->mask & ~(1LL << (find_drap_reg ())); ++ saved_gpr_num ++; ++ } + +-static bool +-loongarch_interrupt_type_p (tree type) +-{ +- return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL; +-} ++ cfun->machine->frame.gpr_saved_num = saved_gpr_num; ++ loongarch_for_each_saved_reg (size, loongarch_save_reg); ++ cfun->machine->frame.mask = mask; ++ } + +-/* Implement TARGET_COMP_TYPE_ATTRIBUTES. */ ++ /* Set up the frame pointer, if we're using one. */ ++ if (frame_pointer_needed && !stack_realign_drap) ++ { ++ insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (frame->hard_frame_pointer_offset - size)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + +-static int +-loongarch_comp_type_attributes (const_tree type1, const_tree type2) +-{ +- /* Disallow mixed near/far attributes. */ +- if (loongarch_far_type_p (type1) && loongarch_near_type_p (type2)) +- return 0; +- if (loongarch_near_type_p (type1) && loongarch_far_type_p (type2)) +- return 0; +- return 1; +-} ++ loongarch_emit_stack_tie (); ++ } + +-/* Implement TARGET_INSERT_ATTRIBUTES. */ ++ /* Stack realign when stack_realign_fp is true. */ ++ if (stack_realign_fp) ++ { ++ int log2_realigned_bytes = exact_log2 (realign_size); ++ rtx tmp_reg = gen_rtx_REG (Pmode, GP_REG_FIRST); ++ insn = emit_insn (gen_insvdi (stack_pointer_rtx, ++ GEN_INT (log2_realigned_bytes), ++ const0_rtx, ++ tmp_reg)); ++ insn = gen_anddi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-realign_size)); ++ loongarch_set_frame_expr (insn); ++ } + +-static void +-loongarch_insert_attributes (tree decl, tree *attributes) +-{ +-} ++ /* Allocate the rest of the frame. */ ++ if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) ++ && size > 0) ++ { ++ loongarch_emit_probe_stack_range (get_stack_check_protect (), size); + +-/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ ++ /* Describe the effect of the previous instructions. */ ++ insn = plus_constant (Pmode, stack_pointer_rtx, -size); ++ insn = gen_rtx_SET (stack_pointer_rtx, insn); ++ loongarch_set_frame_expr (insn); ++ } ++ else ++ { ++ if (size > 0) ++ { ++ if (stack_realign_drap) ++ gcc_assert (size % realign_size == 0); + +-static tree +-loongarch_merge_decl_attributes (tree olddecl, tree newdecl) +-{ +- return merge_attributes (DECL_ATTRIBUTES (olddecl), +- DECL_ATTRIBUTES (newdecl)); ++ if (IMM12_OPERAND (-size)) ++ { ++ insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-size)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ } ++ else ++ { ++ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); ++ emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ LARCH_PROLOGUE_TEMP (Pmode))); ++ ++ /* Describe the effect of the previous instructions. */ ++ insn = plus_constant (Pmode, stack_pointer_rtx, -size); ++ insn = gen_rtx_SET (stack_pointer_rtx, insn); ++ loongarch_set_frame_expr (insn); ++ } ++ } ++ } + } + +-/* Implement TARGET_CAN_INLINE_P. */ ++/* Return nonzero if this function is known to have a null epilogue. ++ This allows the optimizer to omit jumps to jumps if no stack ++ was created. */ + +-static bool +-loongarch_can_inline_p (tree caller, tree callee) ++bool ++loongarch_can_use_return_insn (void) + { +- return default_target_can_inline_p (caller, callee); ++ return reload_completed && cfun->machine->frame.total_size == 0; + } + +-/* Handle an "interrupt" attribute with an optional argument. */ ++/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P ++ says which. */ + +-static tree +-loongarch_handle_interrupt_attr (tree *node ATTRIBUTE_UNUSED, tree name, tree args, +- int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) ++void ++loongarch_expand_epilogue (bool sibcall_p) + { +- /* Check for an argument. */ +- if (is_attribute_p ("interrupt", name) && args != NULL) ++ /* Split the frame into two. STEP1 is the amount of stack we should ++ deallocate before restoring the registers. STEP2 is the amount we ++ should deallocate afterwards. ++ ++ Start off by assuming that no registers need to be restored. */ ++ struct loongarch_frame_info *frame = &cfun->machine->frame; ++ unsigned mask = frame->mask; ++ HOST_WIDE_INT step1 = frame->total_size; ++ HOST_WIDE_INT step2 = 0; ++ rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); ++ rtx insn; ++ HOST_WIDE_INT offset; ++ HOST_WIDE_INT saved_gpr_num = 0; ++ ++ /* We need to add memory barrier to prevent read from deallocated stack. */ ++ bool need_barrier_p ++ = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; ++ ++ if (!sibcall_p && loongarch_can_use_return_insn ()) + { +- tree cst; ++ emit_jump_insn (gen_return ()); ++ return; ++ } + +- cst = TREE_VALUE (args); +- if (TREE_CODE (cst) != STRING_CST) +- { +- warning (OPT_Wattributes, +- "%qE attribute requires a string argument", +- name); +- *no_add_attrs = true; +- } +- else if (strcmp (TREE_STRING_POINTER (cst), "eic") != 0 +- && strncmp (TREE_STRING_POINTER (cst), "vector=", 7) != 0) +- { +- warning (OPT_Wattributes, +- "argument to %qE attribute is neither eic, nor " +- "vector=", name); +- *no_add_attrs = true; +- } +- else if (strncmp (TREE_STRING_POINTER (cst), "vector=", 7) == 0) +- { +- const char *arg = TREE_STRING_POINTER (cst) + 7; ++ if (!stack_realign_fp) ++ { ++ /* Move past any dynamic stack allocations. */ ++ if (cfun->calls_alloca) ++ { ++ /* Emit a barrier to prevent loads from a deallocated stack. */ ++ loongarch_emit_stack_tie (); ++ need_barrier_p = false; + +- /* Acceptable names are: sw0,sw1,hw0,hw1,hw2,hw3,hw4,hw5. */ +- if (strlen (arg) != 3 +- || (arg[0] != 's' && arg[0] != 'h') +- || arg[1] != 'w' +- || (arg[0] == 's' && arg[2] != '0' && arg[2] != '1') +- || (arg[0] == 'h' && (arg[2] < '0' || arg[2] > '5'))) ++ rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); ++ if (!IMM12_OPERAND (INTVAL (adjust))) + { +- warning (OPT_Wattributes, +- "interrupt vector to %qE attribute is not " +- "vector=(sw0|sw1|hw0|hw1|hw2|hw3|hw4|hw5)", +- name); +- *no_add_attrs = true; ++ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust); ++ adjust = LARCH_PROLOGUE_TEMP (Pmode); + } +- } + +- return NULL_TREE; +- } ++ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, ++ hard_frame_pointer_rtx, ++ adjust)); + +- return NULL_TREE; +-} ++ if (!(stack_realign_drap)) ++ { ++ rtx dwarf = NULL_RTX; ++ rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset); ++ rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, ++ hard_frame_pointer_rtx, ++ minus_offset); + +-/* Handle a "use_shadow_register_set" attribute with an optional argument. */ ++ rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); ++ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); ++ RTX_FRAME_RELATED_P (insn) = 1; + +-static tree +-loongarch_handle_use_shadow_register_set_attr (tree *node ATTRIBUTE_UNUSED, +- tree name, tree args, +- int flags ATTRIBUTE_UNUSED, +- bool *no_add_attrs) +-{ +- /* Check for an argument. */ +- if (is_attribute_p ("use_shadow_register_set", name) && args != NULL) +- { +- tree cst; ++ REG_NOTES (insn) = dwarf; ++ } ++ } + +- cst = TREE_VALUE (args); +- if (TREE_CODE (cst) != STRING_CST) +- { +- warning (OPT_Wattributes, +- "%qE attribute requires a string argument", +- name); +- *no_add_attrs = true; +- } +- else if (strcmp (TREE_STRING_POINTER (cst), "intstack") != 0) +- { +- warning (OPT_Wattributes, +- "argument to %qE attribute is not intstack", name); +- *no_add_attrs = true; +- } ++ /* If we need to restore registers, deallocate as much stack as ++ possible in the second step without going out of range. */ ++ if ((frame->mask | frame->fmask) != 0) ++ { ++ step2 = loongarch_first_stack_step (frame); ++ step1 -= step2; ++ } + +- return NULL_TREE; +- } ++ /* Set TARGET to BASE + STEP1. */ ++ if (step1 > 0) ++ { ++ /* Emit a barrier to prevent loads from a deallocated stack. */ ++ loongarch_emit_stack_tie (); ++ need_barrier_p = false; + +- return NULL_TREE; +-} +- +-/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR +- and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ ++ /* Get an rtx for STEP1 that we can add to BASE. */ ++ rtx adjust = GEN_INT (step1); ++ if (!IMM12_OPERAND (step1)) ++ { ++ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust); ++ adjust = LARCH_PROLOGUE_TEMP (Pmode); ++ } + +-static void +-loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr) +-{ +- if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) ++ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ adjust)); ++ ++ rtx dwarf = NULL_RTX; ++ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ GEN_INT (step2)); ++ ++ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ REG_NOTES (insn) = dwarf; ++ } ++ ++ /* Restore drap reg and fp reg first when stack_realign_drap is true. */ ++ if (stack_realign_drap) ++ { ++ if (frame->mask & (1LL << HARD_FRAME_POINTER_REGNUM)) ++ { ++ offset = cfun->machine->frame.gp_sp_offset ++ - (frame->total_size - step2) ; ++ loongarch_save_restore_reg (word_mode, HARD_FRAME_POINTER_REGNUM, ++ offset, loongarch_restore_reg); ++ cfun->machine->frame.mask ++ = frame->mask & ~(1LL << HARD_FRAME_POINTER_REGNUM); ++ saved_gpr_num ++; ++ } ++ if (frame->mask & (1LL << find_drap_reg ())) ++ { ++ offset = cfun->machine->frame.gp_sp_offset ++ - (frame->total_size - step2) - UNITS_PER_WORD * saved_gpr_num; ++ loongarch_save_restore_reg (word_mode, find_drap_reg (), ++ offset, loongarch_restore_reg); ++ cfun->machine->frame.mask ++ = frame->mask & ~(1LL << (find_drap_reg ())); ++ saved_gpr_num ++; ++ } ++ cfun->machine->frame.gpr_saved_num = saved_gpr_num; ++ } ++ } ++ else /* stack_realign_fp. */ + { +- *base_ptr = XEXP (x, 0); +- *offset_ptr = INTVAL (XEXP (x, 1)); ++ /* If we need to restore registers, deallocate as much stack as ++ possible in the second step without going out of range. */ ++ if ((frame->mask | frame->fmask) != 0) ++ { ++ step2 = loongarch_first_stack_step (frame); ++ rtx tmp_reg = plus_constant (Pmode, ++ hard_frame_pointer_rtx, ++ -(frame->hard_frame_pointer_offset ++ - frame->frame_pointer_offset)); ++ insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, tmp_reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + } +- else ++ ++ /* Restore the registers. */ ++ loongarch_for_each_saved_reg (frame->total_size - step2, ++ loongarch_restore_reg); ++ ++ cfun->machine->frame.mask = mask; ++ ++ if (need_barrier_p) ++ loongarch_emit_stack_tie (); ++ ++ /* Deallocate the final bit of the frame. */ ++ if (step2 > 0) + { +- *base_ptr = x; +- *offset_ptr = 0; ++ if (stack_realign_drap) ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, find_drap_reg ()); ++ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, ++ tmp_reg, ++ const0_rtx)); ++ } ++ else ++ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (step2))); ++ ++ rtx dwarf = NULL_RTX; ++ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx); ++ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ REG_NOTES (insn) = dwarf; + } ++ ++ /* Add in the __builtin_eh_return stack adjustment. */ ++ if (crtl->calls_eh_return) ++ emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ EH_RETURN_STACKADJ_RTX)); ++ ++ if (!sibcall_p) ++ emit_jump_insn (gen_simple_return_internal (ra)); + } +- +-static unsigned int loongarch_build_integer (struct loongarch_integer_op *, +- unsigned HOST_WIDE_INT); ++ ++#define LU32I_B (0xfffffULL << 32) ++#define LU52I_B (0xfffULL << 52) + + /* Fill CODES with a sequence of rtl operations to load VALUE. +- Return the number of operations needed. +- Split interger in loongarch_output_move. */ ++ Return the number of operations needed. */ + + static unsigned int + loongarch_build_integer (struct loongarch_integer_op *codes, +- unsigned HOST_WIDE_INT value) ++ HOST_WIDE_INT value) ++ + { +- uint32_t hi32, lo32; +- char all0_bit_vec, sign_bit_vec, allf_bit_vec, paritial_is_sext_of_prev; + unsigned int cost = 0; + +- lo32 = value & 0xffffffff; +- hi32 = value >> 32; +- +- all0_bit_vec = (((hi32 & 0xfff00000) == 0) << 3) +- | (((hi32 & 0x000fffff) == 0) << 2) +- | (((lo32 & 0xfffff000) == 0) << 1) +- | ((lo32 & 0x00000fff) == 0); +- sign_bit_vec = (((hi32 & 0x80000000) != 0) << 3) +- | (((hi32 & 0x00080000) != 0) << 2) +- | (((lo32 & 0x80000000) != 0) << 1) +- | ((lo32 & 0x00000800) != 0); +- allf_bit_vec = (((hi32 & 0xfff00000) == 0xfff00000) << 3) +- | (((hi32 & 0x000fffff) == 0x000fffff) << 2) +- | (((lo32 & 0xfffff000) == 0xfffff000) << 1) +- | ((lo32 & 0x00000fff) == 0x00000fff); +- paritial_is_sext_of_prev = (all0_bit_vec ^ allf_bit_vec) +- & (all0_bit_vec ^ (sign_bit_vec << 1)); +- +- do +- { +- if (paritial_is_sext_of_prev == 0x7) +- { +- codes[0].code = UNKNOWN; +- codes[0].method = METHOD_LU52I; +- codes[0].value = value & 0xfff0000000000000; +- cost++; +- break; +- } +- if ((all0_bit_vec & 0x3) == 0x2) +- { +- codes[cost].code = UNKNOWN; +- codes[cost].method = METHOD_NORMAL; +- codes[cost].value = value & 0xfff; +- cost++; +- } +- else +- { +- switch (paritial_is_sext_of_prev & 0x3) +- { +- case 0: +- codes[cost].code = UNKNOWN; +- codes[cost].method = METHOD_NORMAL; +- codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000; +- cost++; +- codes[cost].code = IOR; +- codes[cost].method = METHOD_NORMAL; +- codes[cost].value = value & 0xfff; +- cost++; +- break; +- case 1: +- codes[cost].code = UNKNOWN; +- codes[cost].method = METHOD_NORMAL; +- codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000; +- cost++; +- break; +- case 2: +- codes[cost].code = UNKNOWN; +- codes[cost].method = METHOD_NORMAL; +- codes[cost].value = (HOST_WIDE_INT)value << 52 >> 52; +- cost++; +- break; +- case 3: +- codes[cost].code = UNKNOWN; +- codes[cost].method = METHOD_NORMAL; +- codes[cost].value = 0; ++ /* Get the lower 32 bits of the value. */ ++ HOST_WIDE_INT low_part = (int32_t)value; ++ ++ if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part)) ++ { ++ /* The value of the lower 32 bit be loaded with one instruction. ++ lu12i.w. */ ++ codes[0].code = UNKNOWN; ++ codes[0].method = METHOD_NORMAL; ++ codes[0].value = low_part; ++ cost++; ++ } ++ else ++ { ++ /* lu12i.w + ior. */ ++ codes[0].code = UNKNOWN; ++ codes[0].method = METHOD_NORMAL; ++ codes[0].value = low_part & ~(IMM_REACH - 1); ++ cost++; ++ HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1); ++ if (iorv != 0) ++ { ++ codes[1].code = IOR; ++ codes[1].method = METHOD_NORMAL; ++ codes[1].value = iorv; + cost++; +- break; +- default: +- gcc_unreachable (); + } +- } ++ } + +- if (((value & 0xfffffffffffff800) ^ 0xfff00000fffff800) == 0) ++ if (TARGET_64BIT) ++ { ++ bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B}; ++ bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B}; ++ ++ int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31; ++ int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51; ++ /* Determine whether the upper 32 bits are sign-extended from the lower ++ 32 bits. If it is, the instructions to load the high order can be ++ ommitted. */ ++ if (lu32i[sign31] && lu52i[sign31]) ++ return cost; ++ /* Determine whether bits 32-51 are sign-extended from the lower 32 ++ bits. If so, directly load 52-63 bits. */ ++ else if (lu32i[sign31]) + { +- codes[cost].method = METHOD_INSV; +- cost++; +- break; ++ codes[cost].method = METHOD_LU52I; ++ codes[cost].value = value & LU52I_B; ++ return cost + 1; + } + +- switch (paritial_is_sext_of_prev >> 2) +- { +- case 0: +- codes[cost].method = METHOD_LU32I; +- codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000; +- cost++; +- case 1: ++ codes[cost].method = METHOD_LU32I; ++ codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0); ++ cost++; ++ ++ /* Determine whether the 52-61 bits are sign-extended from the low order, ++ and if not, load the 52-61 bits. */ ++ if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51]) ++ { + codes[cost].method = METHOD_LU52I; +- codes[cost].value = value & 0xfff0000000000000; +- cost++; +- break; +- case 2: +- codes[cost].method = METHOD_LU32I; +- codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000; +- cost++; +- break; +- case 3: +- break; +- default: +- gcc_unreachable (); +- } ++ codes[cost].value = value & LU52I_B; ++ cost++; ++ } + } +- while (0); ++ ++ gcc_assert (cost <= LARCH_MAX_INTEGER_OPS); + + return cost; + } +- ++ + /* Fill CODES with a sequence of rtl operations to load VALUE. + Return the number of operations needed. +- Split interger in loongarch_output_move. */ ++ Split interger in loongarch_output_move. */ + + static unsigned int + loongarch_integer_cost (HOST_WIDE_INT value) + { + struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; +- return loongarch_build_integer(codes, value); ++ return loongarch_build_integer (codes, value); + } + + /* Implement TARGET_LEGITIMATE_CONSTANT_P. */ +@@ -1785,14 +1846,13 @@ loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + { + return loongarch_const_insns (x) > 0; + } +- + + /* Return true if X is a thread-local symbol. */ + + static bool + loongarch_tls_symbol_p (rtx x) + { +- return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0; ++ return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0; + } + + /* Return true if SYMBOL_REF X is associated with a global symbol +@@ -1809,9 +1869,6 @@ loongarch_global_symbol_p (const_rtx x) + if (!decl) + return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); + +- /* Weakref symbols are not TREE_PUBLIC, but their targets are global +- or weak symbols. Relocations in the object file will be against +- the target symbol, so it's that symbol's binding that matters here. */ + return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl)); + } + +@@ -1826,9 +1883,6 @@ loongarch_global_symbol_noweak_p (const_rtx x) + if (!decl) + return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); + +- /* Weakref symbols are not TREE_PUBLIC, but their targets are global +- or weak symbols. Relocations in the object file will be against +- the target symbol, so it's that symbol's binding that matters here. */ + return DECL_P (decl) && TREE_PUBLIC (decl); + } + +@@ -1841,7 +1895,6 @@ loongarch_weak_symbol_p (const_rtx x) + return DECL_P (decl) && DECL_WEAK (decl); + } + +- + /* Return true if SYMBOL_REF X binds locally. */ + + bool +@@ -1850,9 +1903,8 @@ loongarch_symbol_binds_local_p (const_rtx x) + if (GET_CODE (x) == LABEL_REF) + return false; + +- return (SYMBOL_REF_DECL (x) +- ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) +- : SYMBOL_REF_LOCAL_P (x)); ++ return (SYMBOL_REF_DECL (x) ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) ++ : SYMBOL_REF_LOCAL_P (x)); + } + + /* Return true if OP is a constant vector with the number of units in MODE, +@@ -1995,38 +2047,34 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode) + static bool + loongarch_rtx_constant_in_small_data_p (machine_mode mode) + { +- return (GET_MODE_SIZE (mode) <= loongarch_small_data_threshold); ++ return (GET_MODE_SIZE (mode) <= g_switch_value); + } + + /* Return the method that should be used to access SYMBOL_REF or +- LABEL_REF X in context CONTEXT. */ ++ LABEL_REF X. */ + + static enum loongarch_symbol_type +-loongarch_classify_symbol (const_rtx x, enum loongarch_symbol_context context) ++loongarch_classify_symbol (const_rtx x) + { +- if (TARGET_RTP_PIC) +- return SYMBOL_GOT_DISP; +- + if (GET_CODE (x) == LABEL_REF) +- { +- return SYMBOL_GOT_DISP; +- } ++ return SYMBOL_GOT_DISP; + +- gcc_assert (GET_CODE (x) == SYMBOL_REF); ++ gcc_assert (SYMBOL_REF_P (x)); + + if (SYMBOL_REF_TLS_MODEL (x)) + return SYMBOL_TLS; + +- if (GET_CODE (x) == SYMBOL_REF) ++ if (SYMBOL_REF_P (x)) + return SYMBOL_GOT_DISP; ++ ++ return SYMBOL_GOT_DISP; + } + +-/* Return true if X is a symbolic constant that can be used in context +- CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */ ++/* Return true if X is a symbolic constant. If it is, ++ store the type of the symbol in *SYMBOL_TYPE. */ + + bool +-loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context, +- enum loongarch_symbol_type *symbol_type) ++loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + { + rtx offset; + +@@ -2036,9 +2084,9 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context, + *symbol_type = UNSPEC_ADDRESS_TYPE (x); + x = UNSPEC_ADDRESS (x); + } +- else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) ++ else if (SYMBOL_REF_P (x) || GET_CODE (x) == LABEL_REF) + { +- *symbol_type = loongarch_classify_symbol (x, context); ++ *symbol_type = loongarch_classify_symbol (x); + if (*symbol_type == SYMBOL_TLS) + return true; + } +@@ -2052,8 +2100,6 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context, + relocations. */ + switch (*symbol_type) + { +- /* Fall through. */ +- + case SYMBOL_GOT_DISP: + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: +@@ -2062,17 +2108,25 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context, + } + gcc_unreachable (); + } +- +-/* Like loongarch_symbol_insns We rely on the fact that, in the worst case. */ ++ ++/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed ++ to load symbols of type TYPE into a register. Return 0 if the given ++ type of symbol cannot be used as an immediate operand. ++ ++ Otherwise, return the number of instructions needed to load or store ++ values of mode MODE to or from addresses of type TYPE. Return 0 if ++ the given type of symbol is not valid in addresses. */ + + static int +-loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode) ++loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + { +- if (loongarch_use_pcrel_pool_p[(int) type]) +- { +- /* The constant must be loaded and then dereferenced. */ +- return 0; +- } ++ /* LSX LD.* and ST.* cannot support loading symbols via an immediate ++ operand. */ ++ if (LSX_SUPPORTED_MODE_P (mode)) ++ return 0; ++ ++ if (LASX_SUPPORTED_MODE_P (mode)) ++ return 0; + + switch (type) + { +@@ -2082,8 +2136,6 @@ loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode) + if (mode != MAX_MACHINE_MODE) + return 0; + +- /* Fall through. */ +- + return 3; + + case SYMBOL_TLSGD: +@@ -2097,30 +2149,6 @@ loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode) + gcc_unreachable (); + } + +-/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed +- to load symbols of type TYPE into a register. Return 0 if the given +- type of symbol cannot be used as an immediate operand. +- +- Otherwise, return the number of instructions needed to load or store +- values of mode MODE to or from addresses of type TYPE. Return 0 if +- the given type of symbol is not valid in addresses. +- +- In both cases, instruction counts are based off BASE_INSN_LENGTH. */ +- +-static int +-loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) +-{ +- /* LSX LD.* and ST.* cannot support loading symbols via an immediate +- operand. */ +- if (LSX_SUPPORTED_MODE_P (mode)) +- return 0; +- +- if (LASX_SUPPORTED_MODE_P (mode)) +- return 0; +- +- return loongarch_symbol_insns_1 (type, mode) * (1); +-} +- + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -2129,11 +2157,6 @@ loongarch_cannot_force_const_mem (machine_mode mode, rtx x) + enum loongarch_symbol_type type; + rtx base, offset; + +- /* There is no assembler syntax for expressing an address-sized +- high part. */ +- if (GET_CODE (x) == HIGH) +- return true; +- + /* As an optimization, reject constants that loongarch_legitimize_move + can expand inline. + +@@ -2147,16 +2170,12 @@ loongarch_cannot_force_const_mem (machine_mode mode, rtx x) + return true; + + split_const (x, &base, &offset); +- if (loongarch_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type)) ++ if (loongarch_symbolic_constant_p (base, &type)) + { +- /* See whether we explicitly want these symbols in the pool. */ +- if (loongarch_use_pcrel_pool_p[(int) type]) +- return false; +- + /* The same optimization as for CONST_INT. */ +- if (SMALL_INT (offset) && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0) ++ if (IMM12_INT (offset) ++ && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0) + return true; +- + } + + /* TLS symbols must be computed by loongarch_legitimize_move. */ +@@ -2166,22 +2185,13 @@ loongarch_cannot_force_const_mem (machine_mode mode, rtx x) + return false; + } + +-/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. We can't use blocks for +- constants when we're using a per-function constant pool. */ +- +-static bool +-loongarch_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED, +- const_rtx x ATTRIBUTE_UNUSED) +-{ +- return 1; +-} +- + /* Return true if register REGNO is a valid base register for mode MODE. + STRICT_P is true if REG_OK_STRICT is in effect. */ + + int +-loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode, +- bool strict_p) ++loongarch_regno_mode_ok_for_base_p (int regno, ++ machine_mode mode ATTRIBUTE_UNUSED, ++ bool strict_p) + { + if (!HARD_REGISTER_NUM_P (regno)) + { +@@ -2196,7 +2206,6 @@ loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode, + if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) + return true; + +- + return GP_REG_P (regno); + } + +@@ -2206,7 +2215,7 @@ loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode, + static bool + loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) + { +- if (!strict_p && GET_CODE (x) == SUBREG) ++ if (!strict_p && SUBREG_P (x)) + x = SUBREG_REG (x); + + return (REG_P (x) +@@ -2220,8 +2229,8 @@ static bool + loongarch_valid_offset_p (rtx x, machine_mode mode) + { + /* Check that X is a signed 12-bit number, +- * or check that X is a signed 16-bit number +- * and offset 4 byte aligned */ ++ or check that X is a signed 16-bit number ++ and offset 4 byte aligned. */ + if (!(const_arith_operand (x, Pmode) + || ((mode == E_SImode || mode == E_DImode) + && const_imm16_operand (x, Pmode) +@@ -2231,7 +2240,7 @@ loongarch_valid_offset_p (rtx x, machine_mode mode) + /* We may need to split multiword moves, so make sure that every word + is accessible. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD +- && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) ++ && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) + return false; + + /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +@@ -2248,13 +2257,42 @@ loongarch_valid_offset_p (rtx x, machine_mode mode) + return true; + } + ++static bool ++loongarch_valid_index_p (struct loongarch_address_info *info, rtx x, ++ machine_mode mode, bool strict_p) ++{ ++ rtx index; ++ ++ if ((REG_P (x) || SUBREG_P (x)) ++ && GET_MODE (x) == Pmode) ++ { ++ index = x; ++ } ++ else ++ return false; ++ ++ if (!strict_p ++ && SUBREG_P (index) ++ && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) ++ index = SUBREG_REG (index); ++ ++ if (loongarch_valid_base_register_p (index, mode, strict_p)) ++ { ++ info->type = ADDRESS_REG_REG; ++ info->offset = index; ++ return true; ++ } ++ ++ return false; ++} ++ + /* Return true if X is a valid address for machine mode MODE. If it is, + fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in + effect. */ + + static bool + loongarch_classify_address (struct loongarch_address_info *info, rtx x, +- machine_mode mode, bool strict_p) ++ machine_mode mode, bool strict_p) + { + switch (GET_CODE (x)) + { +@@ -2266,21 +2304,26 @@ loongarch_classify_address (struct loongarch_address_info *info, rtx x, + return loongarch_valid_base_register_p (info->reg, mode, strict_p); + + case PLUS: ++/* ++ if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p) ++ && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p)) ++ { ++ info->reg = XEXP (x, 0); ++ return true; ++ } ++ ++ if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p) ++ && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p)) ++ { ++ info->reg = XEXP (x, 1); ++ return true; ++ } ++*/ + info->type = ADDRESS_REG; + info->reg = XEXP (x, 0); + info->offset = XEXP (x, 1); + return (loongarch_valid_base_register_p (info->reg, mode, strict_p) + && loongarch_valid_offset_p (info->offset, mode)); +- #if 0 +- case LABEL_REF: +- case SYMBOL_REF: +- info->type = ADDRESS_SYMBOLIC; +- return (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_MEM, +- &info->symbol_type) +- && loongarch_symbol_insns (info->symbol_type, mode) > 0 +- && !loongarch_split_p[info->symbol_type]); +- +- #endif + default: + return false; + } +@@ -2296,39 +2339,21 @@ loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) + return loongarch_classify_address (&addr, x, mode, strict_p); + } + +-/* Return true if X is a legitimate $sp-based address for mode MODE. */ +- +-bool +-loongarch_stack_address_p (rtx x, machine_mode mode) +-{ +- struct loongarch_address_info addr; +- +- return (loongarch_classify_address (&addr, x, mode, false) +- && addr.type == ADDRESS_REG +- && addr.reg == stack_pointer_rtx); +-} +- +-/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load +- indexed address instruction. Note that such addresses are +- not considered legitimate in the TARGET_LEGITIMATE_ADDRESS_P +- sense, because their use is so restricted. */ ++/* Return true if ADDR matches the pattern for the indexed address ++ instruction. */ + + static bool +-loongarch_lx_address_p (rtx addr, machine_mode mode) ++loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED) + { + if (GET_CODE (addr) != PLUS + || !REG_P (XEXP (addr, 0)) + || !REG_P (XEXP (addr, 1))) + return false; +- if (LSX_SUPPORTED_MODE_P (mode)) +- return true; +- return false; ++ return true; + } +- + + /* Return the number of instructions needed to load or store a value +- of mode MODE at address X, assuming that BASE_INSN_LENGTH is the +- length of one instruction. Return 0 if X isn't valid for MODE. ++ of mode MODE at address X. Return 0 if X isn't valid for MODE. + Assume that multiword moves may need to be split into word moves + if MIGHT_SPLIT_P, otherwise assume that a single load or store is + enough. */ +@@ -2338,7 +2363,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + { + struct loongarch_address_info addr; + int factor; +- bool lsx_p = (!might_split_p && (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))); ++ bool lsx_p = (!might_split_p && ++ (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))); + + if (!loongarch_classify_address (&addr, x, mode, false)) + return 0; +@@ -2367,6 +2393,9 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + } + return factor; + ++ case ADDRESS_REG_REG: ++ return lsx_p ? 0 : factor; ++ + case ADDRESS_CONST_INT: + return lsx_p ? 0 : factor; + +@@ -2380,7 +2409,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + shifted left SHIFT bits before being used. */ + + bool +-loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) ++loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, ++ int shift = 0) + { + return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits)); + } +@@ -2389,7 +2419,8 @@ loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = + shifted left SHIFT bits before being used. */ + + bool +-loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) ++loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, ++ int shift = 0) + { + x += 1 << (bits + shift - 1); + return loongarch_unsigned_immediate_p (x, bits, shift); +@@ -2408,20 +2439,6 @@ loongarch_ldst_scaled_shift (machine_mode mode) + return shift; + } + +-/* Return true if X is a legitimate address that conforms to the requirements +- for a microLARCH LWSP or SWSP insn. */ +- +-bool +-lwsp_swsp_address_p (rtx x, machine_mode mode) +-{ +- struct loongarch_address_info addr; +- +- return (loongarch_classify_address (&addr, x, mode, false) +- && addr.type == ADDRESS_REG +- && REGNO (addr.reg) == STACK_POINTER_REGNUM +- && uw5_operand (addr.offset, mode)); +-} +- + /* Return true if X is a legitimate address with a 12-bit offset. + MODE is the mode of the value being accessed. */ + +@@ -2433,54 +2450,47 @@ loongarch_12bit_offset_address_p (rtx x, machine_mode mode) + return (loongarch_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG + && CONST_INT_P (addr.offset) +- && ULARCH_12BIT_OFFSET_P (INTVAL (addr.offset))); ++ && LARCH_U12BIT_OFFSET_P (INTVAL (addr.offset))); + } + +-/* Return true if X is a legitimate address with a 9-bit offset. ++/* Return true if X is a legitimate address with a 14-bit offset shifted 2. + MODE is the mode of the value being accessed. */ + + bool +-loongarch_9bit_offset_address_p (rtx x, machine_mode mode) ++loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode) + { + struct loongarch_address_info addr; + + return (loongarch_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG + && CONST_INT_P (addr.offset) +- && LARCH_9BIT_OFFSET_P (INTVAL (addr.offset))); ++ && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset)) ++ && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset))); + } + +-/* Return true if X is a legitimate address with a 14-bit offset shifted 2. +- MODE is the mode of the value being accessed. */ +- + bool +-loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode) ++loongarch_base_index_address_p (rtx x, machine_mode mode) + { + struct loongarch_address_info addr; + + return (loongarch_classify_address (&addr, x, mode, false) +- && addr.type == ADDRESS_REG +- && CONST_INT_P (addr.offset) +- && LISA_16BIT_OFFSET_P (INTVAL (addr.offset)) +- && LISA_SHIFT_2_OFFSET_P (INTVAL (addr.offset))); ++ && addr.type == ADDRESS_REG_REG ++ && REG_P (addr.offset)); + } + +- + /* Return the number of instructions needed to load constant X, +- assuming that BASE_INSN_LENGTH is the length of one instruction. + Return 0 if X isn't a valid constant. */ + + int + loongarch_const_insns (rtx x) + { +- struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; + enum loongarch_symbol_type symbol_type; + rtx offset; + + switch (GET_CODE (x)) + { + case CONST_INT: +- return loongarch_build_integer (codes, INTVAL (x)); ++ return loongarch_integer_cost (INTVAL (x)); + + case CONST_VECTOR: + if ((ISA_HAS_LSX || ISA_HAS_LASX) +@@ -2488,19 +2498,18 @@ loongarch_const_insns (rtx x) + return 1; + /* Fall through. */ + case CONST_DOUBLE: +- /* Allow zeros for normal mode, where we can use $0. */ + return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; + + case CONST: + /* See if we can refer to X directly. */ +- if (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type)) ++ if (loongarch_symbolic_constant_p (x, &symbol_type)) + return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE); + + /* Otherwise try splitting the constant into a base and offset. +- If the offset is a 16-bit value, we can load the base address +- into a register and then use (D)ADDIU to add in the offset. ++ If the offset is a 12-bit value, we can load the base address ++ into a register and then use ADDI.{W/D} to add in the offset. + If the offset is larger, we can load the base and offset +- into separate registers and add them together with (D)ADDU. ++ into separate registers and add them together with ADD.{W/D}. + However, the latter is only possible before reload; during + and after reload, we must have the option of forcing the + constant into the pool instead. */ +@@ -2510,18 +2519,18 @@ loongarch_const_insns (rtx x) + int n = loongarch_const_insns (x); + if (n != 0) + { +- if (SMALL_INT (offset)) ++ if (IMM12_INT (offset)) + return n + 1; + else if (!targetm.cannot_force_const_mem (GET_MODE (x), x)) +- return n + 1 + loongarch_build_integer (codes, INTVAL (offset)); ++ return n + 1 + loongarch_integer_cost (INTVAL (offset)); + } + } + return 0; + + case SYMBOL_REF: + case LABEL_REF: +- return loongarch_symbol_insns (loongarch_classify_symbol (x, SYMBOL_CONTEXT_LEA), +- MAX_MACHINE_MODE); ++ return loongarch_symbol_insns ( ++ loongarch_classify_symbol (x), MAX_MACHINE_MODE); + + default: + return 0; +@@ -2530,8 +2539,7 @@ loongarch_const_insns (rtx x) + + /* X is a doubleword constant that can be handled by splitting it into + two words and loading each word separately. Return the number of +- instructions required to do this, assuming that BASE_INSN_LENGTH +- is the length of one instruction. */ ++ instructions required to do this. */ + + int + loongarch_split_const_insns (rtx x) +@@ -2565,8 +2573,7 @@ loongarch_subword_at_byte (rtx op, unsigned int byte) + } + + /* Return the number of instructions needed to implement INSN, +- given that it loads from or stores to MEM. Assume that +- BASE_INSN_LENGTH is the length of one instruction. */ ++ given that it loads from or stores to MEM. */ + + int + loongarch_load_store_insns (rtx mem, rtx_insn *insn) +@@ -2583,18 +2590,18 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn) + if (might_split_p) + { + set = single_set (insn); +- if (set && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set), insn)) ++ if (set ++ && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set))) + might_split_p = false; + } + + return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p); + } + +-/* Return the number of instructions needed for an integer division, +- assuming that BASE_INSN_LENGTH is the length of one instruction. */ ++/* Return the number of instructions needed for an integer division. */ + + int +-loongarch_idiv_insns (machine_mode mode) ++loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED) + { + int count; + +@@ -2605,7 +2612,6 @@ loongarch_idiv_insns (machine_mode mode) + return count; + } + +- + /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */ + + void +@@ -2619,7 +2625,8 @@ loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1) + of mode MODE. Return that new register. */ + + static rtx +-loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, rtx op1) ++loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, ++ rtx op1) + { + rtx reg; + +@@ -2643,13 +2650,12 @@ loongarch_force_temporary (rtx dest, rtx value) + } + } + +- + /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, + then add CONST_INT OFFSET to the result. */ + + static rtx + loongarch_unspec_address_offset (rtx base, rtx offset, +- enum loongarch_symbol_type symbol_type) ++ enum loongarch_symbol_type symbol_type) + { + base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), + UNSPEC_ADDRESS_FIRST + symbol_type); +@@ -2684,42 +2690,20 @@ loongarch_strip_unspec_address (rtx op) + return op; + } + +- +-/* Return a base register that holds pic_offset_table_rtx. +- TEMP, if nonnull, is a scratch Pmode base register. */ +- +-rtx +-loongarch_pic_base_register (rtx temp) +-{ +- return pic_offset_table_rtx; +- +-} +- +-/* If SRC is the RHS of a load_call insn, return the underlying symbol +- reference. Return NULL_RTX otherwise. */ +- +-static rtx +-loongarch_strip_unspec_call (rtx src) +-{ +- if (GET_CODE (src) == UNSPEC && XINT (src, 1) == UNSPEC_LOAD_CALL) +- return loongarch_strip_unspec_address (XVECEXP (src, 0, 1)); +- return NULL_RTX; +-} +- + /* Return a legitimate address for REG + OFFSET. TEMP is as for + loongarch_force_temporary; it is only needed when OFFSET is not a +- SMALL_OPERAND. */ ++ IMM12_OPERAND. */ + + static rtx + loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + { +- if (!SMALL_OPERAND (offset)) ++ if (!IMM12_OPERAND (offset)) + { + rtx high; + +- /* Leave OFFSET as a 16-bit offset and put the excess in HIGH. +- The addition inside the macro CONST_HIGH_PART may cause an +- overflow, so we need to force a sign-extension check. */ ++ /* Leave OFFSET as a 12-bit offset and put the excess in HIGH. ++ The addition inside the macro CONST_HIGH_PART may cause an ++ overflow, so we need to force a sign-extension check. */ + high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); + offset = CONST_LOW_PART (offset); + high = loongarch_force_temporary (temp, high); +@@ -2727,49 +2711,40 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + } + return plus_constant (Pmode, reg, offset); + } +- ++ + /* The __tls_get_attr symbol. */ +-static GTY(()) rtx loongarch_tls_symbol; ++static GTY (()) rtx loongarch_tls_symbol; + + /* Load an entry from the GOT for a TLS GD access. */ + +-static rtx loongarch_got_load_tls_gd (rtx dest, rtx sym) ++static rtx ++loongarch_got_load_tls_gd (rtx dest, rtx sym) + { +- if (Pmode == DImode) +- return gen_got_load_tls_gddi (dest, sym); +- else +- return gen_got_load_tls_gdsi (dest, sym); ++ return PMODE_INSN (gen_got_load_tls_gd, (dest, sym)); + } + + /* Load an entry from the GOT for a TLS LD access. */ + +-static rtx loongarch_got_load_tls_ld (rtx dest, rtx sym) ++static rtx ++loongarch_got_load_tls_ld (rtx dest, rtx sym) + { +- if (Pmode == DImode) +- return gen_got_load_tls_lddi (dest, sym); +- else +- return gen_got_load_tls_ldsi (dest, sym); ++ return PMODE_INSN (gen_got_load_tls_ld, (dest, sym)); + } + +- + /* Load an entry from the GOT for a TLS IE access. */ + +-static rtx loongarch_got_load_tls_ie (rtx dest, rtx sym) ++static rtx ++loongarch_got_load_tls_ie (rtx dest, rtx sym) + { +- if (Pmode == DImode) +- return gen_got_load_tls_iedi (dest, sym); +- else +- return gen_got_load_tls_iesi (dest, sym); ++ return PMODE_INSN (gen_got_load_tls_ie, (dest, sym)); + } + + /* Add in the thread pointer for a TLS LE access. */ + +-static rtx loongarch_got_load_tls_le (rtx dest, rtx sym) ++static rtx ++loongarch_got_load_tls_le (rtx dest, rtx sym) + { +- if (Pmode == DImode) +- return gen_got_load_tls_ledi (dest, sym); +- else +- return gen_got_load_tls_lesi (dest, sym); ++ return PMODE_INSN (gen_got_load_tls_le, (dest, sym)); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2799,7 +2774,8 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + else + gcc_unreachable (); + +- insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, const0_rtx)); ++ insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, ++ const0_rtx)); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); + insn = get_insns (); +@@ -2820,12 +2796,6 @@ loongarch_legitimize_tls_address (rtx loc) + enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); + rtx_insn *insn; + +- /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */ +- #if 0 +- if (!flag_pic) +- model = TLS_MODEL_LOCAL_EXEC; +- #endif +- + switch (model) + { + case TLS_MODEL_LOCAL_DYNAMIC: +@@ -2843,7 +2813,7 @@ loongarch_legitimize_tls_address (rtx loc) + break; + + case TLS_MODEL_INITIAL_EXEC: +- /* la.tls.ie; tp-relative add */ ++ /* la.tls.ie; tp-relative add */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp = gen_reg_rtx (Pmode); + emit_insn (loongarch_got_load_tls_ie (tmp, loc)); +@@ -2852,7 +2822,7 @@ loongarch_legitimize_tls_address (rtx loc) + break; + + case TLS_MODEL_LOCAL_EXEC: +- /* la.tls.le; tp-relative add */ ++ /* la.tls.le; tp-relative add */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp = gen_reg_rtx (Pmode); + emit_insn (loongarch_got_load_tls_le (tmp, loc)); +@@ -2865,7 +2835,7 @@ loongarch_legitimize_tls_address (rtx loc) + } + return dest; + } +- ++ + rtx + loongarch_legitimize_call_address (rtx addr) + { +@@ -2877,7 +2847,25 @@ loongarch_legitimize_call_address (rtx addr) + } + return addr; + } +- ++ ++/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR ++ and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ ++ ++static void ++loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr) ++{ ++ if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) ++ { ++ *base_ptr = XEXP (x, 0); ++ *offset_ptr = INTVAL (XEXP (x, 1)); ++ } ++ else ++ { ++ *base_ptr = x; ++ *offset_ptr = 0; ++ } ++} ++ + /* If X is not a valid address for mode MODE, force it into a register. */ + + static rtx +@@ -2895,7 +2883,7 @@ loongarch_force_address (rtx x, machine_mode mode) + + static rtx + loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, +- machine_mode mode) ++ machine_mode mode) + { + rtx base, addr; + HOST_WIDE_INT offset; +@@ -2941,28 +2929,30 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) + } + else + x = force_reg (mode, x); ++ + switch (codes[i].method) + { + case METHOD_NORMAL: +- x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value)); ++ x = gen_rtx_fmt_ee (codes[i].code, mode, x, ++ GEN_INT (codes[i].value)); + break; + case METHOD_LU32I: +- emit_insn (gen_rtx_SET (x, gen_rtx_IOR (DImode, +- gen_rtx_ZERO_EXTEND (DImode, +- gen_rtx_SUBREG (SImode, x, 0)), +- GEN_INT (codes[i].value)))); ++ emit_insn ( ++ gen_rtx_SET (x, ++ gen_rtx_IOR (DImode, ++ gen_rtx_ZERO_EXTEND ( ++ DImode, gen_rtx_SUBREG (SImode, x, 0)), ++ GEN_INT (codes[i].value)))); + break; + case METHOD_LU52I: +- emit_insn (gen_lu52i_d (x, x, +- GEN_INT (0xfffffffffffff), +- GEN_INT (codes[i].value))); ++ emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff), ++ GEN_INT (codes[i].value))); + break; + case METHOD_INSV: +- emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, +- x, +- GEN_INT (20), +- GEN_INT (32)), +- gen_rtx_REG (DImode, 0))); ++ emit_insn ( ++ gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20), ++ GEN_INT (32)), ++ gen_rtx_REG (DImode, 0))); + break; + default: + gcc_unreachable (); +@@ -2997,7 +2987,7 @@ loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) + + /* If we have (const (plus symbol offset)), and that expression cannot + be forced into memory, load the symbol first and add in the offset. +- prefer to do this even if the constant _can_ be forced into memory, ++ prefer to do this even if the constant _can_ be forced into memory, + as it usually produces better code. */ + split_const (src, &base, &offset); + if (offset != const0_rtx +@@ -3005,7 +2995,8 @@ loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) + || (can_create_pseudo_p ()))) + { + base = loongarch_force_temporary (dest, base); +- loongarch_emit_move (dest, loongarch_add_offset (NULL, base, INTVAL (offset))); ++ loongarch_emit_move (dest, ++ loongarch_add_offset (NULL, base, INTVAL (offset))); + return; + } + +@@ -3020,7 +3011,6 @@ loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) + bool + loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) + { +- + if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) + { + loongarch_emit_move (dest, force_reg (mode, src)); +@@ -3029,10 +3019,9 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) + + /* Both src and dest are non-registers; one special case is supported where + the source is (const_int 0) and the store can source the zero register. +- LSX and lasx are never able to source the zero register directly in ++ LSX and LASX are never able to source the zero register directly in + memory operations. */ +- if (!register_operand (dest, mode) +- && !register_operand (src, mode) ++ if (!register_operand (dest, mode) && !register_operand (src, mode) + && (!const_0_operand (src, mode) + || LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))) + { +@@ -3049,40 +3038,26 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) + return true; + } + +- if ((GET_CODE (src) == SYMBOL_REF || GET_CODE (src) == LABEL_REF) +- && symbolic_operand (src, VOIDmode) +- && (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)) +- { +- rtx temp = gen_reg_rtx (GET_MODE (dest)); +- rtx x = gen_rtx_UNSPEC_VOLATILE (GET_MODE (dest), gen_rtvec (1, src), UNSPECV_MOVE_EXTREME); +- temp = gen_rtx_USE(VOIDmode, temp); +- temp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec(2, gen_rtx_SET (dest, x), temp)); +- emit_insn (temp); +- return true; +- } +- + return false; + } + +-/* Return true if OP refers to small data symbols directly, not through +- a LO_SUM. CONTEXT is the context in which X appears. */ ++/* Return true if OP refers to small data symbols directly. */ + + static int +-loongarch_small_data_pattern_1 (rtx x, enum loongarch_symbol_context context) ++loongarch_small_data_pattern_1 (rtx x) + { + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) + { + rtx x = *iter; + +- /* Ignore things like "g" constraints in asms. We make no particular +- guarantee about which symbolic constants are acceptable as asm operands +- versus which must be forced into a GPR. */ ++ /* We make no particular guarantee about which symbolic constants are ++ acceptable as asm operands versus which must be forced into a GPR. */ + if (GET_CODE (x) == ASM_OPERANDS) + iter.skip_subrtxes (); + else if (MEM_P (x)) + { +- if (loongarch_small_data_pattern_1 (XEXP (x, 0), SYMBOL_CONTEXT_MEM)) ++ if (loongarch_small_data_pattern_1 (XEXP (x, 0))) + return true; + iter.skip_subrtxes (); + } +@@ -3090,20 +3065,19 @@ loongarch_small_data_pattern_1 (rtx x, enum loongarch_symbol_context context) + return false; + } + +-/* Return true if OP refers to small data symbols directly, not through +- a LO_SUM. */ ++/* Return true if OP refers to small data symbols directly. */ + + bool + loongarch_small_data_pattern_p (rtx op) + { +- return loongarch_small_data_pattern_1 (op, SYMBOL_CONTEXT_LEA); ++ return loongarch_small_data_pattern_1 (op); + } + + /* Rewrite *LOC so that it refers to small data using explicit +- relocations. CONTEXT is the context in which *LOC appears. */ ++ relocation. */ + + static void +-loongarch_rewrite_small_data_1 (rtx *loc, enum loongarch_symbol_context context) ++loongarch_rewrite_small_data_1 (rtx *loc) + { + subrtx_ptr_iterator::array_type array; + FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) +@@ -3111,7 +3085,7 @@ loongarch_rewrite_small_data_1 (rtx *loc, enum loongarch_symbol_context context) + rtx *loc = *iter; + if (MEM_P (*loc)) + { +- loongarch_rewrite_small_data_1 (&XEXP (*loc, 0), SYMBOL_CONTEXT_MEM); ++ loongarch_rewrite_small_data_1 (&XEXP (*loc, 0)); + iter.skip_subrtxes (); + } + } +@@ -3124,15 +3098,15 @@ rtx + loongarch_rewrite_small_data (rtx pattern) + { + pattern = copy_insn (pattern); +- loongarch_rewrite_small_data_1 (&pattern, SYMBOL_CONTEXT_LEA); ++ loongarch_rewrite_small_data_1 (&pattern); + return pattern; + } +- ++ + /* The cost of loading values from the constant pool. It should be + larger than the cost of any constant we want to synthesize inline. */ + #define CONSTANT_POOL_COST COSTS_N_INSNS (8) + +-/* Return true if there is a instruction that implements CODE ++/* Return true if there is a instruction that implements CODE + and if that instruction accepts X as an immediate operand. */ + + static int +@@ -3148,20 +3122,19 @@ loongarch_immediate_operand_p (int code, HOST_WIDE_INT x) + + case ROTATE: + case ROTATERT: +- /* Likewise rotates, if the target supports rotates at all. */ + return true; + + case AND: + case IOR: + case XOR: + /* These instructions take 12-bit unsigned immediates. */ +- return SMALL_OPERAND_UNSIGNED (x); ++ return IMM12_OPERAND_UNSIGNED (x); + + case PLUS: + case LT: + case LTU: + /* These instructions take 12-bit signed immediates. */ +- return SMALL_OPERAND (x); ++ return IMM12_OPERAND (x); + + case EQ: + case NE: +@@ -3178,11 +3151,11 @@ loongarch_immediate_operand_p (int code, HOST_WIDE_INT x) + + case LE: + /* We add 1 to the immediate and use SLT. */ +- return SMALL_OPERAND (x + 1); ++ return IMM12_OPERAND (x + 1); + + case LEU: + /* Likewise SLTU, but reject the always-true case. */ +- return SMALL_OPERAND (x + 1) && x + 1 != 0; ++ return IMM12_OPERAND (x + 1) && x + 1 != 0; + + case SIGN_EXTRACT: + case ZERO_EXTRACT: +@@ -3219,7 +3192,8 @@ loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed) + static int + loongarch_fp_mult_cost (machine_mode mode) + { +- return mode == DFmode ? loongarch_cost->fp_mult_df : loongarch_cost->fp_mult_sf; ++ return mode == DFmode ? loongarch_cost->fp_mult_df ++ : loongarch_cost->fp_mult_sf; + } + + /* Return the cost of floating-point divisions of mode MODE. */ +@@ -3227,23 +3201,20 @@ loongarch_fp_mult_cost (machine_mode mode) + static int + loongarch_fp_div_cost (machine_mode mode) + { +- return mode == DFmode ? loongarch_cost->fp_div_df : loongarch_cost->fp_div_sf; ++ return mode == DFmode ? loongarch_cost->fp_div_df ++ : loongarch_cost->fp_div_sf; + } + + /* Return the cost of sign-extending OP to mode MODE, not including the + cost of OP itself. */ + + static int +-loongarch_sign_extend_cost (machine_mode mode, rtx op) ++loongarch_sign_extend_cost (rtx op) + { + if (MEM_P (op)) + /* Extended loads are as cheap as unextended ones. */ + return 0; + +- if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode) +- /* A sign extension from SImode to DImode in 64-bit mode is free. */ +- return 0; +- + return COSTS_N_INSNS (1); + } + +@@ -3251,16 +3222,12 @@ loongarch_sign_extend_cost (machine_mode mode, rtx op) + cost of OP itself. */ + + static int +-loongarch_zero_extend_cost (machine_mode mode, rtx op) ++loongarch_zero_extend_cost (rtx op) + { + if (MEM_P (op)) + /* Extended loads are as cheap as unextended ones. */ + return 0; + +- if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode) +- /* We need a shift left by 32 bits and a shift right by 32 bits. */ +- return COSTS_N_INSNS (2); +- + /* We can use ANDI. */ + return COSTS_N_INSNS (1); + } +@@ -3281,15 +3248,15 @@ loongarch_set_reg_reg_cost (machine_mode mode) + { + switch (GET_MODE_CLASS (mode)) + { +- case MODE_FCC: +- return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (FCCmode)); ++ case MODE_CC: ++ return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode)); + + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + case MODE_VECTOR_FLOAT: + if (TARGET_HARD_FLOAT) + return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE); +- /* Fall through */ ++ /* Fall through. */ + + default: + return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD); +@@ -3300,20 +3267,13 @@ loongarch_set_reg_reg_cost (machine_mode mode) + + static bool + loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, +- int opno ATTRIBUTE_UNUSED, int *total, bool speed) ++ int opno ATTRIBUTE_UNUSED, int *total, bool speed) + { + int code = GET_CODE (x); + bool float_mode_p = FLOAT_MODE_P (mode); + int cost; + rtx addr; + +- /* The cost of a COMPARE is hard to define for LARCH. COMPAREs don't +- appear in the instruction stream, and the cost of a comparison is +- really the cost of the branch or scc condition. At the time of +- writing, GCC only uses an explicit outer COMPARE code when optabs +- is testing whether a constant is expensive enough to force into a +- register. We want optabs to pass such constants through the LARCH +- expanders instead, so make all constants very cheap here. */ + if (outer_code == COMPARE) + { + gcc_assert (CONSTANT_P (x)); +@@ -3324,68 +3284,34 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + switch (code) + { + case CONST_INT: +- /* Treat *clear_upper32-style ANDs as having zero cost in the +- second operand. The cost is entirely in the first operand. +- +- ??? This is needed because we would otherwise try to CSE +- the constant operand. Although that's the right thing for +- instructions that continue to be a register operation throughout +- compilation, it is disastrous for instructions that could +- later be converted into a memory operation. */ +- if (TARGET_64BIT +- && outer_code == AND +- && UINTVAL (x) == 0xffffffff) ++ if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff) + { + *total = 0; + return true; + } + +- /* When not optimizing for size, we care more about the cost +- of hot code, and hot code is often in a loop. If a constant +- operand needs to be forced into a register, we will often be +- able to hoist the constant load out of the loop, so the load +- should not contribute to the cost. */ +- if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x))) +- { +- *total = 0; +- return true; +- } ++ /* When not optimizing for size, we care more about the cost ++ of hot code, and hot code is often in a loop. If a constant ++ operand needs to be forced into a register, we will often be ++ able to hoist the constant load out of the loop, so the load ++ should not contribute to the cost. */ ++ if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x))) ++ { ++ *total = 0; ++ return true; ++ } + /* Fall through. */ + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + case CONST_DOUBLE: +- if (force_to_mem_operand (x, VOIDmode)) +- { +- *total = COSTS_N_INSNS (1); +- return true; +- } + cost = loongarch_const_insns (x); + if (cost > 0) + { +- /* If the constant is likely to be stored in a GPR, SETs of +- single-insn constants are as cheap as register sets; we +- never want to CSE them. +- +- Don't reduce the cost of storing a floating-point zero in +- FPRs. If we have a zero in an FPR for other reasons, we +- can get better cfg-cleanup and delayed-branch results by +- using it consistently, rather than using $0 sometimes and +- an FPR at other times. Also, moves between floating-point +- registers are sometimes cheaper than MOVGR2FR.W/MOVGR2FR.D $0. */ +- if (cost == 1 +- && outer_code == SET ++ if (cost == 1 && outer_code == SET + && !(float_mode_p && TARGET_HARD_FLOAT)) + cost = 0; +- /* When code loads a constant N>1 times, we rarely +- want to CSE the constant itself. It is usually better to +- have N copies of the last operation in the sequence and one +- shared copy of the other operations. +- +- Also, if we have a CONST_INT, we don't know whether it is +- for a word or doubleword operation, so we cannot rely on +- the result of loongarch_build_integer. */ + else if ((outer_code == SET || GET_MODE (x) == VOIDmode)) + cost = 1; + *total = COSTS_N_INSNS (cost); +@@ -3399,16 +3325,16 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + /* If the address is legitimate, return the number of + instructions it needs. */ + addr = XEXP (x, 0); +- cost = loongarch_address_insns (addr, mode, true); +- if (cost > 0) ++ /* Check for a scaled indexed address. */ ++ if (loongarch_index_address_p (addr, mode)) + { +- *total = COSTS_N_INSNS (cost + 1); ++ *total = COSTS_N_INSNS (2); + return true; + } +- /* Check for a scaled indexed address. */ +- if (loongarch_lx_address_p (addr, mode)) ++ cost = loongarch_address_insns (addr, mode, true); ++ if (cost > 0) + { +- *total = COSTS_N_INSNS (2); ++ *total = COSTS_N_INSNS (cost + 1); + return true; + } + /* Otherwise use the default handling. */ +@@ -3425,34 +3351,31 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + case AND: + /* Check for a *clear_upper32 pattern and treat it like a zero + extension. See the pattern's comment for details. */ +- if (TARGET_64BIT +- && mode == DImode +- && CONST_INT_P (XEXP (x, 1)) ++ if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1)) + && UINTVAL (XEXP (x, 1)) == 0xffffffff) + { +- *total = (loongarch_zero_extend_cost (mode, XEXP (x, 0)) ++ *total = (loongarch_zero_extend_cost (XEXP (x, 0)) + + set_src_cost (XEXP (x, 0), mode, speed)); + return true; + } + /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in + a single instruction. */ +- if (GET_CODE (XEXP (x, 0)) == NOT +- && GET_CODE (XEXP (x, 1)) == NOT) ++ if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT) + { + cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1; +- *total = (COSTS_N_INSNS (cost) ++ *total = (COSTS_N_INSNS (cost) + + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) + + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed)); + return true; + } +- ++ + /* Fall through. */ + + case IOR: + case XOR: + /* Double-word operations use two single-word operations. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), +- speed); ++ speed); + return true; + + case ASHIFT: +@@ -3461,18 +3384,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + case ROTATE: + case ROTATERT: + if (CONSTANT_P (XEXP (x, 1))) +- *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), +- speed); ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), ++ COSTS_N_INSNS (4), speed); + else +- *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12), +- speed); ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), ++ COSTS_N_INSNS (12), speed); + return true; + + case ABS: + if (float_mode_p) +- *total = loongarch_cost->fp_add; ++ *total = loongarch_cost->fp_add; + else +- *total = COSTS_N_INSNS (4); ++ *total = COSTS_N_INSNS (4); + return false; + + case LT: +@@ -3500,7 +3423,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + return false; + } + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), +- speed); ++ speed); + return true; + + case MINUS: +@@ -3512,13 +3435,12 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + /* If it's an add + mult (which is equivalent to shift left) and +- it's immediate operand satisfies const_immlsa_operand predicate. */ +- if (((ISA_HAS_LSA && mode == SImode) +- || (ISA_HAS_DLSA && mode == DImode)) ++ it's immediate operand satisfies const_immalsl_operand predicate. */ ++ if ((mode == SImode || (TARGET_64BIT && mode == DImode)) + && GET_CODE (XEXP (x, 0)) == MULT) + { + rtx op2 = XEXP (XEXP (x, 0), 1); +- if (const_immlsa_operand (op2, mode)) ++ if (const_immalsl_operand (op2, mode)) + { + *total = (COSTS_N_INSNS (1) + + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) +@@ -3529,9 +3451,8 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + + /* Double-word operations require three single-word operations and + an SLTU. */ +- *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), +- COSTS_N_INSNS (4), +- speed); ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), ++ speed); + return true; + + case NEG: +@@ -3549,9 +3470,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + if (float_mode_p) + *total = loongarch_fp_mult_cost (mode); + else if (mode == DImode && !TARGET_64BIT) +- /* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions, +- where the mulsidi3 always includes an MFHI and an MFLO. */ +- // FIXED ME??? + *total = (speed + ? loongarch_cost->int_mult_si * 3 + 6 + : COSTS_N_INSNS (7)); +@@ -3566,7 +3484,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + case DIV: + /* Check for a reciprocal. */ + if (float_mode_p +- && ISA_HAS_FP_RECIP_RSQRT (mode) + && flag_unsafe_math_optimizations + && XEXP (x, 0) == CONST1_RTX (mode)) + { +@@ -3597,17 +3514,17 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); + } + else if (mode == DImode) +- *total = loongarch_cost->int_div_di; ++ *total = loongarch_cost->int_div_di; + else + *total = loongarch_cost->int_div_si; + return false; + + case SIGN_EXTEND: +- *total = loongarch_sign_extend_cost (mode, XEXP (x, 0)); ++ *total = loongarch_sign_extend_cost (XEXP (x, 0)); + return false; + + case ZERO_EXTEND: +- *total = loongarch_zero_extend_cost (mode, XEXP (x, 0)); ++ *total = loongarch_zero_extend_cost (XEXP (x, 0)); + return false; + case TRUNCATE: + /* Costings for highpart multiplies. Matching patterns of the form: +@@ -3617,11 +3534,11 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + (const_int 32) + */ + if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT +- || GET_CODE (XEXP (x, 0)) == LSHIFTRT) ++ || GET_CODE (XEXP (x, 0)) == LSHIFTRT) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32 + && GET_MODE (XEXP (x, 0)) == DImode) +- || (ISA_HAS_DMUL ++ || (TARGET_64BIT + && INTVAL (XEXP (XEXP (x, 0), 1)) == 64 + && GET_MODE (XEXP (x, 0)) == TImode)) + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT +@@ -3643,13 +3560,13 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + for (int i = 0; i < 2; ++i) + { + rtx op = XEXP (XEXP (XEXP (x, 0), 0), i); +- if (ISA_HAS_DMUL ++ if (TARGET_64BIT + && GET_CODE (op) == ZERO_EXTEND + && GET_MODE (op) == DImode) + *total += rtx_cost (op, DImode, MULT, i, speed); + else +- *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), +- 0, speed); ++ *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0, ++ speed); + } + + return true; +@@ -3684,58 +3601,168 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + + static int + loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, +- tree vectype, +- int misalign ATTRIBUTE_UNUSED) ++ tree vectype, ++ int misalign ATTRIBUTE_UNUSED) + { +- unsigned elements; +- ++ int elements; + switch (type_of_cost) + { +- case scalar_stmt: +- case scalar_load: +- case vector_stmt: +- case vector_load: +- case vec_to_scalar: +- case scalar_to_vec: +- case cond_branch_not_taken: +- case vec_perm: +- case vec_promote_demote: +- case scalar_store: +- case vector_store: +- return 1; +- +- case unaligned_load: +- case vector_gather_load: +- return 2; ++ case scalar_stmt: ++ case vector_stmt: ++ case vec_to_scalar: ++ case scalar_to_vec: ++ case vec_perm: ++ case vec_promote_demote: ++ return 1; + +- case unaligned_store: +- case vector_scatter_store: +- return 10; ++ case scalar_store: ++ case scalar_load: ++ return 3; + +- case cond_branch_taken: +- return 3; ++ case vector_store: ++ case vector_load: ++ return loongarch_vector_access_cost; + +- case vec_construct: +- elements = TYPE_VECTOR_SUBPARTS (vectype); +- return elements / 2 + 1; ++ case unaligned_load: ++ case unaligned_store: ++ case vector_gather_load: ++ case vector_scatter_store: ++ return 5; + +- default: +- gcc_unreachable (); +- } +-} ++ case cond_branch_taken: ++ return 4; ++ ++ case cond_branch_not_taken: ++ return 2; ++ ++ case vec_construct: ++ { ++ elements = TYPE_VECTOR_SUBPARTS (vectype); ++ if (ISA_HAS_LASX) ++ return elements + 1; ++ else ++ return elements; ++ } ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Implement targetm.vectorize.add_stmt_cost. */ ++static unsigned ++loongarch_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, ++ struct _stmt_vec_info *stmt_info, int misalign, ++ enum vect_cost_model_location where) ++{ ++ unsigned *cost = (unsigned *) data; ++ unsigned retval = 0; ++ ++ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; ++ int stmt_cost = - 1; ++ ++ if ((kind == vector_stmt || kind == scalar_stmt) ++ && stmt_info ++ && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) ++ { ++ tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt); ++ bool fp = false; ++ machine_mode mode = TImode; ++ ++ if (vectype != NULL) ++ { ++ fp = FLOAT_TYPE_P (vectype); ++ mode = TYPE_MODE (vectype); ++ } ++ ++ switch (subcode) ++ { ++ case PLUS_EXPR: ++ case POINTER_PLUS_EXPR: ++ case MINUS_EXPR: ++ case MULT_EXPR: ++ case WIDEN_MULT_EXPR: ++ case MULT_HIGHPART_EXPR: ++ stmt_cost = fp ? 2 : 1; ++ break; ++ ++ case TRUNC_DIV_EXPR: ++ case CEIL_DIV_EXPR: ++ case FLOOR_DIV_EXPR: ++ case ROUND_DIV_EXPR: ++ case TRUNC_MOD_EXPR: ++ case CEIL_MOD_EXPR: ++ case FLOOR_MOD_EXPR: ++ case RDIV_EXPR: ++ case ROUND_MOD_EXPR: ++ case EXACT_DIV_EXPR: ++ stmt_cost = fp ? 4 : 1; ++ break; ++ ++ case NOP_EXPR: ++ /* Only sign-conversions are free. */ ++ if (tree_nop_conversion_p ++ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), ++ TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) ++ stmt_cost = 0; ++ break; ++ ++ default: ++ break; ++ } ++ } ++ if (kind == vec_construct ++ && stmt_info ++ && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type ++ || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) ++ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE ++ && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST) ++ { ++ stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, misalign); ++ stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype); ++ } ++ if (stmt_cost == -1) ++ stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, misalign); ++ ++ /* Statements in an inner loop relative to the loop being ++ vectorized are weighted more heavily. The value here is ++ arbitrary and could potentially be improved with analysis. */ ++ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) ++ count *= 50; /* FIXME. */ ++ ++ retval = (unsigned) (count * stmt_cost); ++ ++ cost[where] += retval; ++ ++ return retval; ++} ++ ++static bool ++loongarch_builtin_support_vector_misalignment(machine_mode mode, const_tree type, ++ int misalignment, bool is_packed) ++{ ++ if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT) ++ { ++ if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) ++ return false; ++ if (misalignment == -1) ++ return false; ++ } ++ return default_builtin_support_vector_misalignment (mode, type, misalignment, ++ is_packed); ++} + + + /* Implement TARGET_ADDRESS_COST. */ + + static int + loongarch_address_cost (rtx addr, machine_mode mode, +- addr_space_t as ATTRIBUTE_UNUSED, +- bool speed ATTRIBUTE_UNUSED) ++ addr_space_t as ATTRIBUTE_UNUSED, ++ bool speed ATTRIBUTE_UNUSED) + { + return loongarch_address_insns (addr, mode, false); + } + +- + /* Return one word of double-word value OP, taking into account the fixed + endianness of certain registers. HIGH_P is true to select the high part, + false to select the low part. */ +@@ -3743,24 +3770,16 @@ loongarch_address_cost (rtx addr, machine_mode mode, + rtx + loongarch_subword (rtx op, bool high_p) + { +- unsigned int byte, offset; ++ unsigned int byte; + machine_mode mode; + ++ byte = high_p ? UNITS_PER_WORD : 0; + mode = GET_MODE (op); + if (mode == VOIDmode) + mode = TARGET_64BIT ? TImode : DImode; + +- if (high_p) +- byte = UNITS_PER_WORD; +- else +- byte = 0; +- + if (FP_REG_RTX_P (op)) +- { +- /* Paired FPRs are always ordered little-endian. */ +- offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0); +- return gen_rtx_REG (word_mode, REGNO (op) + offset); +- } ++ return gen_rtx_REG (word_mode, REGNO (op) + high_p); + + if (MEM_P (op)) + return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte)); +@@ -3768,11 +3787,10 @@ loongarch_subword (rtx op, bool high_p) + return simplify_gen_subreg (word_mode, op, mode, byte); + } + +-/* Return true if a move from SRC to DEST should be split into two. +- SPLIT_TYPE describes the split condition. */ ++/* Return true if a move from SRC to DEST should be split into two. */ + + bool +-loongarch_split_move_p (rtx dest, rtx src, enum loongarch_split_type split_type) ++loongarch_split_move_p (rtx dest, rtx src) + { + /* FPR-to-FPR moves can be done in a single instruction, if they're + allowed at all. */ +@@ -3801,19 +3819,18 @@ loongarch_split_move_p (rtx dest, rtx src, enum loongarch_split_type split_type) + return size > UNITS_PER_WORD; + } + +-/* Split a move from SRC to DEST, given that loongarch_split_move_p holds. +- SPLIT_TYPE describes the split condition. */ ++/* Split a move from SRC to DEST, given that loongarch_split_move_p holds. */ + + void +-loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, rtx insn_) ++loongarch_split_move (rtx dest, rtx src, rtx insn_) + { + rtx low_dest; + +- gcc_checking_assert (loongarch_split_move_p (dest, src, split_type)); ++ gcc_checking_assert (loongarch_split_move_p (dest, src)); + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_128bit_move (dest, src); + else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_256bit_move (dest, src); ++ loongarch_split_256bit_move (dest, src); + else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) + { + if (!TARGET_64BIT && GET_MODE (dest) == DImode) +@@ -3830,23 +3847,24 @@ loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, r + /* The operation can be split into two normal moves. Decide in + which order to do them. */ + low_dest = loongarch_subword (dest, false); +- if (REG_P (low_dest) +- && reg_overlap_mentioned_p (low_dest, src)) ++ if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) + { +- loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true)); ++ loongarch_emit_move (loongarch_subword (dest, true), ++ loongarch_subword (src, true)); + loongarch_emit_move (low_dest, loongarch_subword (src, false)); + } + else + { + loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true)); ++ loongarch_emit_move (loongarch_subword (dest, true), ++ loongarch_subword (src, true)); + } + } + + /* This is a hack. See if the next insn uses DEST and if so, see if we + can forward SRC for DEST. This is most useful if the next insn is a +- simple store. */ +- rtx_insn *insn = (rtx_insn *)insn_; ++ simple store. */ ++ rtx_insn *insn = (rtx_insn *) insn_; + struct loongarch_address_info addr = {}; + if (insn) + { +@@ -3859,7 +3877,8 @@ loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, r + if (MEM_P (src)) + { + rtx tmp = XEXP (src, 0); +- loongarch_classify_address (&addr, tmp, GET_MODE (tmp), true); ++ loongarch_classify_address (&addr, tmp, GET_MODE (tmp), ++ true); + if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) + validate_change (next, &SET_SRC (set), src, false); + } +@@ -3870,24 +3889,6 @@ loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, r + } + } + +-/* Return the split type for instruction INSN. */ +- +-static enum loongarch_split_type +-loongarch_insn_split_type (rtx insn) +-{ +- basic_block bb = BLOCK_FOR_INSN (insn); +- if (bb) +- { +- if (optimize_bb_for_speed_p (bb)) +- return SPLIT_FOR_SPEED; +- else +- return SPLIT_FOR_SIZE; +- } +- /* Once CFG information has been removed, we should trust the optimization +- decisions made by previous passes and only split where necessary. */ +- return SPLIT_IF_NECESSARY; +-} +- + /* Return true if a 128-bit move from SRC to DEST should be split. */ + + bool +@@ -3974,10 +3975,10 @@ loongarch_split_128bit_move (rtx dest, rtx src) + s = loongarch_subword_at_byte (src, byte); + if (!TARGET_64BIT) + emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); ++ GEN_INT (1 << index))); + else + emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); ++ GEN_INT (1 << index))); + } + } + else if (FP_REG_RTX_P (src)) +@@ -4200,28 +4201,93 @@ loongarch_split_lsx_fill_d (rtx dest, rtx src) + emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1))); + emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3))); + } +- ++ + /* Return true if a move from SRC to DEST in INSN should be split. */ + + bool +-loongarch_split_move_insn_p (rtx dest, rtx src, rtx insn) ++loongarch_split_move_insn_p (rtx dest, rtx src) + { +- return loongarch_split_move_p (dest, src, loongarch_insn_split_type (insn)); ++ return loongarch_split_move_p (dest, src); + } + +-/* Split a move from SRC to DEST in INSN, given that loongarch_split_move_insn_p +- holds. */ ++/* Split a move from SRC to DEST in INSN, given that ++ loongarch_split_move_insn_p holds. */ + + void + loongarch_split_move_insn (rtx dest, rtx src, rtx insn) + { +- loongarch_split_move (dest, src, loongarch_insn_split_type (insn), insn); ++ loongarch_split_move (dest, src, insn); + } +- + +-/* Forward declaration. Used below */ ++/* Implement TARGET_CONSTANT_ALIGNMENT. */ ++ + static HOST_WIDE_INT +-loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align); ++loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align) ++{ ++ if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) ++ return MAX (align, BITS_PER_WORD); ++ return align; ++} ++ ++const char * ++loongarch_output_move_index (rtx x, machine_mode mode, bool ldr) ++{ ++ int index = exact_log2 (GET_MODE_SIZE (mode)); ++ if (!IN_RANGE (index, 0, 3)) ++ return NULL; ++ ++ struct loongarch_address_info info; ++ if ((loongarch_classify_address (&info, x, mode, false) ++ && !(info.type == ADDRESS_REG_REG)) ++ || !loongarch_legitimate_address_p (mode, x, false)) ++ return NULL; ++ ++ const char *const insn[][4] = ++ { ++ { ++ "stx.b\t%z1,%0", ++ "stx.h\t%z1,%0", ++ "stx.w\t%z1,%0", ++ "stx.d\t%z1,%0", ++ }, ++ { ++ "ldx.bu\t%0,%1", ++ "ldx.hu\t%0,%1", ++ "ldx.w\t%0,%1", ++ "ldx.d\t%0,%1", ++ } ++ }; ++ ++ return insn[ldr][index]; ++} ++ ++const char * ++loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr) ++{ ++ int index = exact_log2 (GET_MODE_SIZE (mode)); ++ if (!IN_RANGE (index, 2, 3)) ++ return NULL; ++ ++ struct loongarch_address_info info; ++ if ((loongarch_classify_address (&info, x, mode, false) ++ && !(info.type == ADDRESS_REG_REG)) ++ || !loongarch_legitimate_address_p (mode, x, false)) ++ return NULL; ++ ++ const char *const insn[][2] = ++ { ++ { ++ "fstx.s\t%1,%0", ++ "fstx.d\t%1,%0" ++ }, ++ { ++ "fldx.s\t%0,%1", ++ "fldx.d\t%0,%1" ++ } ++ }; ++ ++ return insn[ldr][index-2]; ++} + + /* Return the appropriate instructions to move SRC into DEST. Assume + that SRC is operand 1 and DEST is operand 0. */ +@@ -4235,9 +4301,8 @@ loongarch_output_move (rtx dest, rtx src) + bool dbl_p = (GET_MODE_SIZE (mode) == 8); + bool lsx_p = LSX_SUPPORTED_MODE_P (mode); + bool lasx_p = LASX_SUPPORTED_MODE_P (mode); +- enum loongarch_symbol_type symbol_type; + +- if (loongarch_split_move_p (dest, src, SPLIT_IF_NECESSARY)) ++ if (loongarch_split_move_p (dest, src)) + return "#"; + + if ((lsx_p || lasx_p) +@@ -4246,7 +4311,7 @@ loongarch_output_move (rtx dest, rtx src) + && CONST_INT_P (CONST_VECTOR_ELT (src, 0))) + { + gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511)); +- if(lsx_p || lasx_p) ++ if (lsx_p || lasx_p) + { + switch (GET_MODE_SIZE (mode)) + { +@@ -4254,7 +4319,8 @@ loongarch_output_move (rtx dest, rtx src) + return "vrepli.%v0\t%w0,%E1"; + case 32: + return "xvrepli.%v0\t%u0,%E1"; +- default: gcc_unreachable (); ++ default: ++ gcc_unreachable (); + } + } + } +@@ -4278,77 +4344,98 @@ loongarch_output_move (rtx dest, rtx src) + return "vrepli.b\t%w0,0"; + case 32: + return "xvrepli.b\t%u0,0"; +- default: gcc_unreachable (); ++ default: ++ gcc_unreachable (); + } + } + + return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1"; + } + } +- if (dest_code == MEM) ++ if (dest_code == MEM) + { ++ const char *insn = NULL; ++ insn = loongarch_output_move_index (XEXP (dest, 0), GET_MODE (dest), ++ false); ++ if (insn) ++ return insn; ++ + rtx offset = XEXP (dest, 0); +- if (GET_CODE(offset) == PLUS) +- offset = XEXP(offset, 1); ++ if (GET_CODE (offset) == PLUS) ++ offset = XEXP (offset, 1); ++ else ++ offset = const0_rtx; + switch (GET_MODE_SIZE (mode)) + { +- case 1: return "st.b\t%z1,%0"; +- case 2: return "st.h\t%z1,%0"; +- case 4: +- if (const_arith_operand (offset, Pmode)) +- return "st.w\t%z1,%0"; +- else +- return "stptr.w\t%z1,%0"; +- case 8: +- if (const_arith_operand (offset, Pmode)) +- return "st.d\t%z1,%0"; +- else +- return "stptr.d\t%z1,%0"; +- default: gcc_unreachable (); +- } ++ case 1: ++ return "st.b\t%z1,%0"; ++ case 2: ++ return "st.h\t%z1,%0"; ++ case 4: ++ if (const_arith_operand (offset, Pmode) || (offset == const0_rtx)) ++ return "st.w\t%z1,%0"; ++ else ++ return "stptr.w\t%z1,%0"; ++ case 8: ++ if (const_arith_operand (offset, Pmode) || (offset == const0_rtx)) ++ return "st.d\t%z1,%0"; ++ else ++ return "stptr.d\t%z1,%0"; ++ default: ++ gcc_unreachable (); ++ } + } + } + if (dest_code == REG && GP_REG_P (REGNO (dest))) + { + if (src_code == REG) +- { +- if (FP_REG_P (REGNO (src))) +- { +- gcc_assert (!lsx_p); +- return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1"; +- } +- } ++ if (FP_REG_P (REGNO (src))) ++ { ++ gcc_assert (!lsx_p && !lasx_p); ++ return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1"; ++ } + + if (src_code == MEM) + { ++ const char *insn = NULL; ++ insn = loongarch_output_move_index (XEXP (src, 0), GET_MODE (src), ++ true); ++ if (insn) ++ return insn; ++ + rtx offset = XEXP (src, 0); +- if (GET_CODE(offset) == PLUS) +- offset = XEXP(offset, 1); ++ if (GET_CODE (offset) == PLUS) ++ offset = XEXP (offset, 1); ++ else ++ offset = const0_rtx; + switch (GET_MODE_SIZE (mode)) + { +- case 1: return "ld.bu\t%0,%1"; +- case 2: return "ld.hu\t%0,%1"; +- case 4: +- if (const_arith_operand (offset, Pmode)) +- return "ld.w\t%0,%1"; +- else +- return "ldptr.w\t%0,%1"; +- case 8: +- if (const_arith_operand (offset, Pmode)) +- return "ld.d\t%0,%1"; +- else +- return "ldptr.d\t%0,%1"; +- default: gcc_unreachable (); ++ case 1: ++ return "ld.bu\t%0,%1"; ++ case 2: ++ return "ld.hu\t%0,%1"; ++ case 4: ++ if (const_arith_operand (offset, Pmode) || (offset == const0_rtx)) ++ return "ld.w\t%0,%1"; ++ else ++ return "ldptr.w\t%0,%1"; ++ case 8: ++ if (const_arith_operand (offset, Pmode) || (offset == const0_rtx)) ++ return "ld.d\t%0,%1"; ++ else ++ return "ldptr.d\t%0,%1"; ++ default: ++ gcc_unreachable (); + } + } +- ++ + if (src_code == CONST_INT) + { +- if (LUI_INT (src)) ++ if (LU12I_INT (src)) + return "lu12i.w\t%0,%1>>12\t\t\t# %X1"; +- else if (SMALL_INT (src)) ++ else if (IMM12_INT (src)) + return "addi.w\t%0,$r0,%1\t\t\t# %X1"; +- else if (SMALL_INT_UNSIGNED (src)) ++ else if (IMM12_INT_UNSIGNED (src)) + return "ori\t%0,$r0,%1\t\t\t# %X1"; + else if (LU52I_INT (src)) + return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1"; +@@ -4358,56 +4445,51 @@ loongarch_output_move (rtx dest, rtx src) + + if (symbolic_operand (src, VOIDmode)) + { +- +- switch (loongarch_cmodel_var) ++ if ((TARGET_CMODEL_TINY && (!loongarch_global_symbol_p (src) ++ || loongarch_symbol_binds_local_p (src))) ++ || (TARGET_CMODEL_TINY_STATIC && !loongarch_weak_symbol_p (src))) + { +- case LARCH_CMODEL_TINY: +- do ++ /* The symbol must be aligned to 4 byte. */ ++ unsigned int align; ++ ++ if (GET_CODE (src) == LABEL_REF) ++ align = 32 /* Whatever. */; ++ else if (CONSTANT_POOL_ADDRESS_P (src)) ++ align = GET_MODE_ALIGNMENT (get_pool_mode (src)); ++ else if (TREE_CONSTANT_POOL_ADDRESS_P (src)) + { +- if (loongarch_global_symbol_p (src) +- && !loongarch_symbol_binds_local_p (src)) +- break; +- case LARCH_CMODEL_TINY_STATIC: +- if (loongarch_weak_symbol_p (src)) +- break; +- +- /* The symbol must be aligned to 4 byte. */ +- unsigned int align; +- +- if (GET_CODE (src) == LABEL_REF) +- align = 128 /* whatever */; +- /* copy from aarch64 */ +- else if (CONSTANT_POOL_ADDRESS_P (src)) +- align = GET_MODE_ALIGNMENT (get_pool_mode (src)); +- else if (TREE_CONSTANT_POOL_ADDRESS_P (src)) +- { +- tree exp = SYMBOL_REF_DECL (src); +- align = TYPE_ALIGN (TREE_TYPE (exp)); +- align = loongarch_constant_alignment (exp, align); +- } +- else if (SYMBOL_REF_DECL (src)) +- align = DECL_ALIGN (SYMBOL_REF_DECL (src)); +- else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src) +- && SYMBOL_REF_BLOCK (src) != NULL) +- align = SYMBOL_REF_BLOCK (src)->alignment; +- else +- align = BITS_PER_UNIT; +- +- if (align % (4 * 8) == 0) +- return "pcaddi\t%0,%%pcrel(%1)>>2"; ++ tree exp = SYMBOL_REF_DECL (src); ++ align = TYPE_ALIGN (TREE_TYPE (exp)); ++ align = loongarch_constant_alignment (exp, align); + } +- while (0); +- case LARCH_CMODEL_NORMAL: +- case LARCH_CMODEL_LARGE: ++ else if (SYMBOL_REF_DECL (src)) ++ align = DECL_ALIGN (SYMBOL_REF_DECL (src)); ++ else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src) ++ && SYMBOL_REF_BLOCK (src) != NULL) ++ align = SYMBOL_REF_BLOCK (src)->alignment; ++ else ++ align = BITS_PER_UNIT; ++ ++ if (align % (4 * 8) == 0) ++ return "pcaddi\t%0,%%pcrel(%1)>>2"; ++ } ++ if (TARGET_CMODEL_TINY ++ || TARGET_CMODEL_TINY_STATIC ++ || TARGET_CMODEL_NORMAL ++ || TARGET_CMODEL_LARGE) ++ { + if (!loongarch_global_symbol_p (src) + || loongarch_symbol_binds_local_p (src)) + return "la.local\t%0,%1"; + else + return "la.global\t%0,%1"; +- case LARCH_CMODEL_EXTREME: +- default: ++ } ++ if (TARGET_CMODEL_EXTREME) ++ { ++ sorry ("Normal symbol loading not implemented in extreme mode."); + gcc_unreachable (); + } ++ + } + } + if (src_code == REG && FP_REG_P (REGNO (src))) +@@ -4416,14 +4498,14 @@ loongarch_output_move (rtx dest, rtx src) + { + if (lsx_p || lasx_p) + { +- + switch (GET_MODE_SIZE (mode)) + { + case 16: + return "vori.b\t%w0,%w1,0"; + case 32: + return "xvori.b\t%u0,%u1,0"; +- default: gcc_unreachable (); ++ default: ++ gcc_unreachable (); + } + } + else +@@ -4434,16 +4516,22 @@ loongarch_output_move (rtx dest, rtx src) + { + if (lsx_p || lasx_p) + { +- + switch (GET_MODE_SIZE (mode)) + { + case 16: + return "vst\t%w1,%0"; + case 32: + return "xvst\t%u1,%0"; +- default: gcc_unreachable (); ++ default: ++ gcc_unreachable (); + } + } ++ const char *insn = NULL; ++ insn = loongarch_output_move_index_float (XEXP (dest, 0), ++ GET_MODE (dest), ++ false); ++ if (insn) ++ return insn; + + return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0"; + } +@@ -4460,17 +4548,25 @@ loongarch_output_move (rtx dest, rtx src) + return "vld\t%w0,%1"; + case 32: + return "xvld\t%u0,%1"; +- default: gcc_unreachable (); ++ default: ++ gcc_unreachable (); + } + } ++ const char *insn = NULL; ++ insn = loongarch_output_move_index_float (XEXP (src, 0), ++ GET_MODE (src), ++ true); ++ if (insn) ++ return insn; ++ + return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1"; + } + } + gcc_unreachable (); + } +- ++ + /* Return true if CMP1 is a suitable second operand for integer ordering +- test CODE. See also the *sCC patterns in loongarch.md. */ ++ test CODE. */ + + static bool + loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1) +@@ -4508,7 +4604,7 @@ loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1) + + static bool + loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, +- machine_mode mode) ++ machine_mode mode) + { + HOST_WIDE_INT plus_one; + +@@ -4551,11 +4647,11 @@ loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, + + static void + loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr, +- rtx target, rtx cmp0, rtx cmp1) ++ rtx target, rtx cmp0, rtx cmp1) + { + machine_mode mode; + +- /* First see if there is a LARCH instruction that can do this operation. ++ /* First see if there is a LoongArch instruction that can do this operation. + If not, try doing the same for the inverse operation. If that also + fails, force CMP1 into a register and try again. */ + mode = GET_MODE (cmp0); +@@ -4574,7 +4670,7 @@ loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr, + rtx inv_target; + + inv_target = loongarch_force_binary (GET_MODE (target), +- inv_code, cmp0, cmp1); ++ inv_code, cmp0, cmp1); + loongarch_emit_binary (XOR, target, inv_target, const1_rtx); + } + else +@@ -4595,43 +4691,14 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1) + return cmp0; + + if (uns_arith_operand (cmp1, VOIDmode)) +- return expand_binop (GET_MODE (cmp0), xor_optab, +- cmp0, cmp1, 0, 0, OPTAB_DIRECT); ++ return expand_binop (GET_MODE (cmp0), xor_optab, cmp0, cmp1, 0, 0, ++ OPTAB_DIRECT); + +- return expand_binop (GET_MODE (cmp0), sub_optab, +- cmp0, cmp1, 0, 0, OPTAB_DIRECT); ++ return expand_binop (GET_MODE (cmp0), sub_optab, cmp0, cmp1, 0, 0, ++ OPTAB_DIRECT); + } + +-/* Allocate a floating-point condition-code register of mode MODE. +- +- These condition code registers are used for certain kinds +- of compound operation, such as compare and branches, vconds, +- and built-in functions. At expand time, their use is entirely +- controlled by LARCH-specific code and is entirely internal +- to these compound operations. +- +- We could (and did in the past) expose condition-code values +- as pseudo registers and leave the register allocator to pick +- appropriate registers. The problem is that it is not practically +- possible for the rtl optimizers to guarantee that no spills will +- be needed, even when AVOID_CCMODE_COPIES is defined. We would +- therefore need spill and reload sequences to handle the worst case. +- +- Although such sequences do exist, they are very expensive and are +- not something we'd want to use. +- +- The main benefit of having more than one condition-code register +- is to allow the pipelining of operations, especially those involving +- comparisons and conditional moves. We don't really expect the +- registers to be live for long periods, and certainly never want +- them to be live across calls. +- +- Also, there should be no penalty attached to using all the available +- registers. They are simply bits in the same underlying FPU control +- register. +- +- We therefore expose the hardware registers from the outset and use +- a simple round-robin allocation scheme. */ ++/* Allocate a floating-point condition-code register of mode MODE. */ + + static rtx + loongarch_allocate_fcc (machine_mode mode) +@@ -4646,15 +4713,14 @@ loongarch_allocate_fcc (machine_mode mode) + gcc_unreachable (); + + cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); +- if (cfun->machine->next_fcc > ST_REG_LAST - ST_REG_FIRST) ++ if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST) + cfun->machine->next_fcc = 0; + +- regno = ST_REG_FIRST + cfun->machine->next_fcc; ++ regno = FCC_REG_FIRST + cfun->machine->next_fcc; + cfun->machine->next_fcc += count; + return gen_rtx_REG (mode, regno); + } + +- + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ + + static void +@@ -4681,6 +4747,7 @@ loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1) + } + } + ++ + /* Convert a comparison into something that can be used in a branch. On + entry, *OP0 and *OP1 are the values being compared and *CODE is the code + used to compare them. Update them to describe the final comparison. */ +@@ -4688,6 +4755,9 @@ loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1) + static void + loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1) + { ++ static const enum rtx_code ++ mag_comparisons[][2] = {{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}}; ++ + if (splittable_const_int_operand (*op1, VOIDmode)) + { + HOST_WIDE_INT rhs = INTVAL (*op1); +@@ -4695,7 +4765,7 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1) + if (*code == EQ || *code == NE) + { + /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */ +- if (SMALL_OPERAND (-rhs)) ++ if (IMM12_OPERAND (-rhs)) + { + *op0 = loongarch_force_binary (GET_MODE (*op0), PLUS, *op0, + GEN_INT (-rhs)); +@@ -4704,10 +4774,6 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1) + } + else + { +- static const enum rtx_code mag_comparisons[][2] = { +- {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE} +- }; +- + /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */ + for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++) + { +@@ -4730,13 +4796,14 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1) + } + } + ++ loongarch_extend_comparands (*code, op0, op1); + +- *op0 = force_reg (GET_MODE (*op0), *op0); +- if (*op1 != const0_rtx) +- *op1 = force_reg (GET_MODE (*op0), *op1); ++ *op0 = force_reg (word_mode, *op0); ++ if (*op1 != const0_rtx) ++ *op1 = force_reg (word_mode, *op1); + } + +-/* Like riscv_emit_int_compare, but for floating-point comparisons. */ ++/* Like loongarch_emit_int_compare, but for floating-point comparisons. */ + + static void + loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) +@@ -4749,7 +4816,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) + then compare that register against zero. + + Set CMP_CODE to the code of the comparison instruction and +- *CODE to the code that the branch or move should use. */ ++ *CODE to the code that the branch or move should use. */ + enum rtx_code cmp_code = *code; + /* Three FP conditions cannot be implemented by reversing the + operands for FCMP.cond.fmt, instead a reversed condition code is +@@ -4760,7 +4827,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) + *op1 = const0_rtx; + loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); + } +- ++ + /* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2] + and OPERAND[3]. Store the result in OPERANDS[0]. + +@@ -4775,14 +4842,15 @@ loongarch_expand_scc (rtx operands[]) + rtx op0 = operands[2]; + rtx op1 = operands[3]; + ++ loongarch_extend_comparands (code, &op0, &op1); ++ op0 = force_reg (word_mode, op0); ++ + gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT); + + if (code == EQ || code == NE) + { +- { +- rtx zie = loongarch_zero_if_equal (op0, op1); +- loongarch_emit_binary (code, target, zie, const0_rtx); +- } ++ rtx zie = loongarch_zero_if_equal (op0, op1); ++ loongarch_emit_binary (code, target, zie, const0_rtx); + } + else + loongarch_emit_int_order_test (code, 0, target, op0, op1); +@@ -4804,49 +4872,65 @@ loongarch_expand_conditional_branch (rtx *operands) + else + loongarch_emit_int_compare (&code, &op0, &op1); + +- condition = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); ++ condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + emit_jump_insn (gen_condjump (condition, operands[3])); + } + + /* Perform the comparison in OPERANDS[1]. Move OPERANDS[2] into OPERANDS[0] + if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0]. */ +- +-void +-loongarch_expand_conditional_move (rtx *operands) ++bool ++loongarch_expand_conditional_move_la464 (rtx *operands) + { + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); ++ machine_mode cmp_mode = GET_MODE(op0); ++ machine_mode sel_mode = GET_MODE(operands[2]); + ++ /*ffii means Selecting a fixed point based on floating point comparison results */ + if (FLOAT_MODE_P (GET_MODE (op1))) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if (code == EQ || code == NE) /*see test-mask-1.c && test-mask-5.c*/ ++ loongarch_extend_comparands (code, &op0, &op1); ++ ++ op0 = force_reg (word_mode, op0); ++ ++ if (code == EQ || code == NE) ++ { ++ op0 = loongarch_zero_if_equal (op0, op1); ++ op1 = const0_rtx; ++ /*Be careful iiff*/ ++ if(FLOAT_MODE_P(sel_mode)){ ++ rtx target = gen_reg_rtx (GET_MODE (op0)); ++ bool invert = false; ++ loongarch_emit_int_order_test (LTU, NULL, op0, ++ force_reg (GET_MODE (op0), const0_rtx), ++ op0); ++ op1 = const0_rtx; ++ } ++ } ++ else + { +- op0 = loongarch_zero_if_equal(op0, op1); ++ /* The comparison needs a separate scc instruction. Store the ++ result of the scc in *OP0 and compare it against zero. */ ++ bool invert = false; ++ rtx target = gen_reg_rtx (GET_MODE (op0)); ++ loongarch_emit_int_order_test (code, &invert, target, op0, op1); ++ code = invert ? EQ : NE; ++ op0 = target; + op1 = const0_rtx; + } +- else /*see test-mask-2.c*/ +- { +- /* The comparison needs a separate scc instruction. Store the +- result of the scc in *OP0 and compare it against zero. */ +- bool invert = false; +- rtx target = gen_reg_rtx (GET_MODE (op0)); +- loongarch_emit_int_order_test (code, &invert, target, op0, op1); +- code = invert ? EQ: NE; +- op0 = target; +- op1 = const0_rtx; +- } + } + + rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); + /* There is no direct support for general conditional GP move involving +- two registers using SEL. see test-mask-3.c */ +- if (INTEGRAL_MODE_P (GET_MODE (operands[2])) ++ two registers using SEL. */ ++ if (INTEGRAL_MODE_P (cmp_mode) ++ &&(INTEGRAL_MODE_P (sel_mode)) + && register_operand (operands[2], VOIDmode) +- && register_operand (operands[3], VOIDmode)) +- { ++ && register_operand (operands[3], VOIDmode)) { ++ + machine_mode mode = GET_MODE (operands[0]); + rtx temp = gen_reg_rtx (mode); + rtx temp2 = gen_reg_rtx (mode); +@@ -4864,26 +4948,72 @@ loongarch_expand_conditional_move (rtx *operands) + + /* Merge the two results, at least one is guaranteed to be zero. */ + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2))); +- } +- else +- emit_insn (gen_rtx_SET (operands[0], +- gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond, +- operands[2], operands[3]))); +-} +- +- +-/* Initialize *CUM for a call to a function of type FNTYPE. */ + +-void +-loongarch_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype) +-{ +- memset (cum, 0, sizeof (*cum)); +- cum->prototype = (fntype && prototype_p (fntype)); +- cum->gp_reg_found = (cum->prototype && stdarg_p (fntype)); ++ return true; ++ /*For ffii, iiff due to movgr2fr, movfr2gr overhead is relatively large, ++ * so we use some compromise*/ ++ } else if (INTEGRAL_MODE_P (cmp_mode) ++ &&(FLOAT_MODE_P (sel_mode)) ++ && register_operand (operands[2], VOIDmode) ++ && register_operand (operands[3], VOIDmode)) { ++ rtx temp = gen_reg_rtx(sel_mode); ++ rtx fcc_reg =loongarch_allocate_fcc (FCCmode); ++ rtx diop0 = convert_to_mode(E_DImode, op0, true); ++ /*stl t0 i i-> movgr2fr f0 t0 -> movfr2cf fcc0 f0 -> fsel f f*/ ++ if(sel_mode == E_DFmode){ ++ emit_insn(gen_movdgr2frdf(temp, diop0)); ++ emit_insn(gen_movfr2fccdf(fcc_reg, temp)); ++ }else if(sel_mode == E_SFmode){ ++ emit_insn(gen_movdgr2frsf(temp, diop0)); ++ emit_insn(gen_movfr2fccsf(fcc_reg, temp)); ++ } ++ cond = gen_rtx_fmt_ee (code, GET_MODE(fcc_reg), fcc_reg, const0_rtx); ++ ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond, ++ operands[2], operands[3]))); ++ return true; ++ } else if (FLOAT_MODE_P (cmp_mode) ++ &&(INTEGRAL_MODE_P (sel_mode))) { ++ /*movgr2fr f0 i -> movgr2fr f1 i -> fcmp fcc0 f f ++ * -> fsel f3 f0 f1 -> movfr2gr t0 f3*/ ++ machine_mode dst_mode = GET_MODE (operands[0]); ++ rtx temp = gen_reg_rtx (E_DFmode); ++ rtx temp2 = gen_reg_rtx (E_DFmode); ++ rtx temp3 = gen_reg_rtx (E_DFmode); ++ ++ if(CONST_INT_P(operands[2])){ ++ operands[2] = copy_to_mode_reg(dst_mode, operands[2]); ++ } ++ if(CONST_INT_P(operands[3])){ ++ operands[3] = copy_to_mode_reg(dst_mode, operands[3]); ++ } ++ if(GET_MODE(operands[2]) != E_DImode) ++ operands[2] = convert_to_mode(E_DImode, operands[2], false); ++ if(GET_MODE(operands[3]) != E_DImode) ++ operands[3] = convert_to_mode(E_DImode, operands[3], false); ++ ++ emit_insn(gen_movdgr2frdf(temp2, operands[2])); ++ emit_insn(gen_movdgr2frdf(temp3, operands[3])); ++ ++ emit_insn (gen_rtx_SET (temp, ++ gen_rtx_IF_THEN_ELSE (E_DFmode, cond, ++ temp2, temp3))); ++ if(GET_MODE(operands[0]) == E_DImode) ++ emit_insn(gen_movdfr2grdi(operands[0], temp)); ++ else if(GET_MODE(operands[0]) == E_SImode) ++ emit_insn(gen_movdfr2grsi(operands[0], temp)); ++ return true; ++ } else if(FLOAT_MODE_P (cmp_mode) ++ &&FLOAT_MODE_P (sel_mode)){ ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond, ++ operands[2], operands[3]))); ++ return true; ++ } ++ ++ return false; + } +- +- +- + /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ + + static void +@@ -4893,100 +5023,15 @@ loongarch_va_start (tree valist, rtx nextarg) + std_expand_builtin_va_start (valist, nextarg); + } + +- +-/* Start a definition of function NAME. */ +- +-static void +-loongarch_start_function_definition (const char *name) +-{ +- ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function"); +- +- /* Start the definition proper. */ +- assemble_name (asm_out_file, name); +- fputs (":\n", asm_out_file); +-} +- +-/* End a function definition started by loongarch_start_function_definition. */ +- +-static void +-loongarch_end_function_definition (const char *name) +-{ +-} +- + /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ + + static bool +-loongarch_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) ++loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, ++ tree exp ATTRIBUTE_UNUSED) + { +- if (!TARGET_SIBCALLS) +- return false; +- +- /* Interrupt handlers need special epilogue code and therefore can't +- use sibcalls. */ +- if (loongarch_interrupt_type_p (TREE_TYPE (current_function_decl))) +- return false; +- +- /* Otherwise OK. */ ++ /* Always OK. */ + return true; + } +- +-/* Implement a handler for STORE_BY_PIECES operations +- for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ +- +-bool +-loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) +-{ +- /* Storing by pieces involves moving constants into registers +- of size MIN (ALIGN, BITS_PER_WORD), then storing them. +- We need to decide whether it is cheaper to load the address of +- constant data into a register and use a block move instead. */ +- +- /* If the data is only byte aligned, then: +- +- (a1) A block move of less than 4 bytes would involve three 3 LD.Bs and +- 3 ST.Bs. We might as well use 3 single-instruction LIs and 3 SD.Bs +- instead. +- +- (a2) A block move of 4 bytes from aligned source data can use an +- LD.W/ST.W sequence. This is often better than the 4 LIs and +- 4 SD.Bs that we would generate when storing by pieces. */ +- if (align <= BITS_PER_UNIT) +- return size < 4; +- +- /* If the data is 2-byte aligned, then: +- +- (b1) A block move of less than 4 bytes would use a combination of LD.Bs, +- LD.Hs, SD.Bs and SD.Hs. We get better code by using single-instruction +- LIs, SD.Bs and SD.Hs instead. +- +- (b2) A block move of 4 bytes from aligned source data would again use +- an LD.W/ST.W sequence. In most cases, loading the address of +- the source data would require at least one extra instruction. +- It is often more efficient to use 2 single-instruction LIs and +- 2 SHs instead. +- +- (b3) A block move of up to 3 additional bytes would be like (b1). +- +- (b4) A block move of 8 bytes from aligned source data can use two +- LD.W/ST.W sequences. Both sequences are better than the 4 LIs +- and 4 ST.Hs that we'd generate when storing by pieces. +- +- The reasoning for higher alignments is similar: +- +- (c1) A block move of less than 4 bytes would be the same as (b1). +- +- (c2) A block move of 4 bytes would use an LD.W/ST.W sequence. Again, +- loading the address of the source data would typically require +- at least one extra instruction. It is generally better to use +- LUI/ORI/SW instead. +- +- (c3) A block move of up to 3 additional bytes would be like (b1). +- +- (c4) A block move of 8 bytes can use two LD.W/ST.W sequences or a single +- LD.D/ST.D sequence, and in these cases we've traditionally preferred +- the memory copy over the more bulky constant moves. */ +- return size < 8; +-} + + /* Emit straight-line code to move LENGTH bytes from SRC to DEST. + Assume that the areas do not overlap. */ +@@ -4999,20 +5044,13 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + int i; + machine_mode mode; + rtx *regs; ++ ++ if (STRICT_ALIGNMENT) ++ bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))); ++ else ++ bits = BITS_PER_WORD; + +- /* Work out how many bits to move at a time. If both operands have +- half-word alignment, it is usually better to move in half words. +- For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr +- and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr. +- Otherwise move word-sized chunks. +- +- For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise +- picking the minimum of alignment or BITS_PER_WORD gets us the +- desired size for bits. */ +- +- bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))); +- +- if (TARGET_LASX) ++ if (ISA_HAS_LASX && !STRICT_ALIGNMENT) + { + bits = BITS_PER_WORD * 4; + mode = V4DImode; +@@ -5029,7 +5067,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + + /* Load as many BITS-sized chunks as possible. Use a normal load if + the source has enough alignment, otherwise use left/right pairs. */ +- if (TARGET_LASX) ++ if (ISA_HAS_LASX && !STRICT_ALIGNMENT) + { + for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) + { +@@ -5047,7 +5085,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + } + + /* Copy the chunks to the destination. */ +- if (TARGET_LASX) ++ if (ISA_HAS_LASX && !STRICT_ALIGNMENT) + { + + for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) +@@ -5065,9 +5103,9 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + /* Mop up any left-over bytes. */ + if (offset < length) + { +- if (TARGET_LASX) ++ if (ISA_HAS_LASX && !STRICT_ALIGNMENT) + { +- if(length - offset >= 16) ++ if (length - offset >= 16) + { + rtx *regs_tmp = XALLOCAVEC (rtx, 1); + regs_tmp[0] = gen_reg_rtx (V2DImode); +@@ -5075,7 +5113,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + loongarch_emit_move (adjust_address (dest, V2DImode, offset), regs_tmp[0]); + offset += 16; + } +- if(length - offset >= 8) ++ if (length - offset >= 8) + { + rtx *regs_tmp = XALLOCAVEC (rtx, 1); + regs_tmp[0] = gen_reg_rtx (DImode); +@@ -5083,7 +5121,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + loongarch_emit_move (adjust_address (dest, DImode, offset), regs_tmp[0]); + offset += 8; + } +- if(length - offset >= 4) ++ if (length - offset >= 4) + { + rtx *regs_tmp = XALLOCAVEC (rtx, 1); + regs_tmp[0] = gen_reg_rtx (SImode); +@@ -5091,7 +5129,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + loongarch_emit_move (adjust_address (dest, SImode, offset), regs_tmp[0]); + offset += 4; + } +- if(length - offset >= 2) ++ if (length - offset >= 2) + { + rtx *regs_tmp = XALLOCAVEC (rtx, 1); + regs_tmp[0] = gen_reg_rtx (HImode); +@@ -5099,7 +5137,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + loongarch_emit_move (adjust_address (dest, HImode, offset), regs_tmp[0]); + offset += 2; + } +- if(length - offset >= 1) ++ if (length - offset >= 1) + { + rtx *regs_tmp = XALLOCAVEC (rtx, 1); + regs_tmp[0] = gen_reg_rtx (QImode); +@@ -5108,7 +5146,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + offset += 1; + } + +- if(length - offset != 0) ++ if (length - offset != 0) + gcc_unreachable (); + } + else +@@ -5131,8 +5169,8 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) + register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ + + static void +-loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, +- rtx *loop_reg, rtx *loop_mem) ++loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, ++ rtx *loop_mem) + { + *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); + +@@ -5148,7 +5186,7 @@ loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, + + static void + loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, +- HOST_WIDE_INT bytes_per_iter) ++ HOST_WIDE_INT bytes_per_iter) + { + rtx_code_label *label; + rtx src_reg, dest_reg, final_src, test; +@@ -5163,8 +5201,8 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + + /* Calculate the value that SRC_REG should have after the last iteration + of the loop. */ +- final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), +- 0, 0, OPTAB_WIDEN); ++ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), 0, ++ 0, OPTAB_WIDEN); + + /* Emit the start of the loop. */ + label = gen_label_rtx (); +@@ -5174,8 +5212,10 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + loongarch_block_move_straight (dest, src, bytes_per_iter); + + /* Move on to the next block. */ +- loongarch_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); +- loongarch_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); ++ loongarch_emit_move (src_reg, ++ plus_constant (Pmode, src_reg, bytes_per_iter)); ++ loongarch_emit_move (dest_reg, ++ plus_constant (Pmode, dest_reg, bytes_per_iter)); + + /* Emit the loop condition. */ + test = gen_rtx_NE (VOIDmode, src_reg, final_src); +@@ -5198,12 +5238,12 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + bool + loongarch_expand_block_move (rtx dest, rtx src, rtx length) + { +- +- int max_move_bytes = (TARGET_LASX ? \ ++ int max_move_bytes = (ISA_HAS_LASX ? \ + LARCH_MAX_MOVE_BYTES_STRAIGHT * 8 \ + : LARCH_MAX_MOVE_BYTES_STRAIGHT); + +- if (CONST_INT_P (length) && INTVAL (length) <= loongarch_max_inline_memcpy_size) ++ if (CONST_INT_P (length) ++ && INTVAL (length) <= loongarch_max_inline_memcpy_size) + { + if (INTVAL (length) <= max_move_bytes) + { +@@ -5213,13 +5253,12 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx length) + else if (optimize) + { + loongarch_block_move_loop (dest, src, INTVAL (length), +- LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER); ++ LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER); + return true; + } + } + return false; + } +- + + /* Expand a QI or HI mode atomic memory operation. + +@@ -5239,13 +5278,12 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx length) + + void + loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, +- rtx result, rtx mem, rtx oldval, +- rtx newval, rtx model) ++ rtx result, rtx mem, rtx oldval, rtx newval, ++ rtx model) + { + rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask; + rtx unshifted_mask_reg, mask, inverted_mask, si_op; + rtx res = NULL; +- rtx tmp = NULL; + machine_mode mode; + + mode = GET_MODE (mem); +@@ -5253,7 +5291,7 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, + /* Compute the address of the containing SImode value. */ + orig_addr = force_reg (Pmode, XEXP (mem, 0)); + memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr, +- force_reg (Pmode, GEN_INT (-4))); ++ force_reg (Pmode, GEN_INT (-4))); + + /* Create a memory reference for it. */ + memsi = gen_rtx_MEM (SImode, memsi_addr); +@@ -5263,7 +5301,6 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, + /* Work out the byte offset of the QImode or HImode value, + counting from the least significant byte. */ + shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3)); +- + /* Multiply by eight to convert the shift value from bytes to bits. */ + loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3)); + +@@ -5301,14 +5338,13 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, + res = gen_reg_rtx (SImode); + + if (newval) +- si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model); ++ si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, ++ model); + else if (result) + si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model); + else + si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model); + +- //si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model); +- + emit_insn (si_op); + + if (result) +@@ -5320,24 +5356,14 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, + } + } + +-/* Return true if X is a MEM with the same size as MODE. */ +- +-bool +-loongarch_mem_fits_mode_p (machine_mode mode, rtx x) +-{ +- return (MEM_P (x) +- && MEM_SIZE_KNOWN_P (x) +- && MEM_SIZE (x) == GET_MODE_SIZE (mode)); +-} +- + /* Return true if (zero_extract OP WIDTH BITPOS) can be used as the + source of an "ext" instruction or the destination of an "ins" + instruction. OP must be a register operand and the following + conditions must hold: + +- 0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op)) +- 0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) +- 0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) ++ 0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op)) ++ 0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) ++ 0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) + + Also reject lengths equal to a word as they are better handled + by the move patterns. */ +@@ -5358,31 +5384,11 @@ loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos) + return true; + } + +- +-/* Return true iff OP1 and OP2 are valid operands together for the +- *and3 patterns. For the cases to consider, +- see the table in the comment before the pattern. */ +- +-bool +-and_operands_ok (machine_mode mode, rtx op1, rtx op2) +-{ +- +- if (memory_operand (op1, mode)) +- { +- return and_load_operand (op2, mode); +- } +- else +- return and_reg_operand (op2, mode); +-} +- + /* Print the text for PRINT_OPERAND punctation character CH to FILE. + The punctuation characters are: + + '.' Print the name of the register with a hard-wired zero (zero or $r0). + '$' Print the name of the stack pointer register (sp or $r3). +- ':' Print "c" to use the compact version if the delay slot is a nop. +- '!' Print "s" to use the short version if the delay slot contains a +- 16-bit instruction. + + See also loongarch_init_print_operand_punct. */ + +@@ -5399,14 +5405,6 @@ loongarch_print_operand_punctuation (FILE *file, int ch) + fputs (reg_names[STACK_POINTER_REGNUM], file); + break; + +- case ':': +- /* When final_sequence is 0, the delay slot will be a nop. We can +- use the compact version where available. The %: formatter will +- only be present if a compact form of the branch is available. */ +- if (final_sequence == 0) +- putc ('c', file); +- break; +- + default: + gcc_unreachable (); + break; +@@ -5420,7 +5418,7 @@ loongarch_init_print_operand_punct (void) + { + const char *p; + +- for (p = ".$:"; *p; p++) ++ for (p = ".$"; *p; p++) + loongarch_print_operand_punct[(unsigned char) *p] = true; + } + +@@ -5429,7 +5427,8 @@ loongarch_init_print_operand_punct (void) + opcode to FILE. */ + + static void +-loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter) ++loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, ++ int letter) + { + switch (code) + { +@@ -5443,7 +5442,7 @@ loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter + case GEU: + case LTU: + case LEU: +- /* Conveniently, the LARCH names for these conditions are the same ++ /* Conveniently, the LoongArch names for these conditions are the same + as their RTL equivalents. */ + fputs (GET_RTX_NAME (code), file); + break; +@@ -5457,7 +5456,8 @@ loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter + /* Likewise floating-point branches. */ + + static void +-loongarch_print_float_branch_condition (FILE *file, enum rtx_code code, int letter) ++loongarch_print_float_branch_condition (FILE *file, enum rtx_code code, ++ int letter) + { + switch (code) + { +@@ -5487,20 +5487,22 @@ loongarch_print_operand_punct_valid_p (unsigned char code) + implement the release portion of memory model MODEL. */ + + static bool +-loongarch_memmodel_needs_rel_and_acq_fence (enum memmodel model) ++loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + { + switch (model) + { + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: +- case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: +- case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: + case MEMMODEL_SYNC_ACQUIRE: + return true; + ++ case MEMMODEL_RELEASE: ++ case MEMMODEL_ACQUIRE: ++ case MEMMODEL_CONSUME: ++ if (!TARGET_uARCH_LA664) ++ return true; + case MEMMODEL_RELAXED: + return false; + +@@ -5517,25 +5519,25 @@ loongarch_memmodel_needs_release_fence (enum memmodel model) + { + switch (model) + { +- case MEMMODEL_ACQ_REL: +- case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: +- case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: +- return true; ++ case MEMMODEL_ACQ_REL: ++ case MEMMODEL_SEQ_CST: ++ case MEMMODEL_SYNC_SEQ_CST: ++ case MEMMODEL_RELEASE: ++ case MEMMODEL_SYNC_RELEASE: ++ return true; + +- case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: +- case MEMMODEL_RELAXED: +- return false; ++ case MEMMODEL_ACQUIRE: ++ case MEMMODEL_CONSUME: ++ case MEMMODEL_SYNC_ACQUIRE: ++ case MEMMODEL_RELAXED: ++ return false; + +- default: +- gcc_unreachable (); ++ default: ++ gcc_unreachable (); + } + } + +-/* Implement TARGET_PRINT_OPERAND. The LARCH-specific operand codes are: ++/* Implement TARGET_PRINT_OPERAND. The LoongArch-specific operand codes are: + + 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. + 'X' Print CONST_INT OP in hexadecimal format. +@@ -5674,7 +5676,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + break; + + case 'N': +- loongarch_print_int_branch_condition (file, reverse_condition (code), letter); ++ loongarch_print_int_branch_condition (file, reverse_condition (code), ++ letter); + break; + + case 'F': +@@ -5683,19 +5686,20 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + + case 'W': + loongarch_print_float_branch_condition (file, reverse_condition (code), +- letter); ++ letter); + break; + + case 'T': + case 't': + { + int truth = (code == NE) == (letter == 'T'); +- fputc ("zfnt"[truth * 2 + ST_REG_P (REGNO (XEXP (op, 0)))], file); ++ fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file); + } + break; + + case 'Y': +- if (code == CONST_INT && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions)) ++ if (code == CONST_INT ++ && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions)) + fputs (loongarch_fp_conditions[UINTVAL (op)], file); + else + output_operand_lossage ("'%%%c' is not a valid operand prefix", +@@ -5750,18 +5754,36 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + break; + + case 'A': +- if (loongarch_memmodel_needs_rel_and_acq_fence ((enum memmodel) INTVAL (op))) ++ if (loongarch_memmodel_needs_rel_acq_fence ((enum memmodel) INTVAL (op))) + fputs ("_db", file); + break; + + case 'G': + if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) +- fputs ("dbar\t0", file); ++ fputs ("dbar\t0x11", file); ++ break; ++ ++ case 'J': ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (op)); ++ if (is_mm_release (model)) ++ fputs ("dbar\t0x12", file); ++ } ++ break; ++ ++ case 'K': ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (op)); ++ if (is_mm_acquire (model)) ++ fputs ("dbar\t0x18", file); ++ } + break; + + case 'i': + if (code != REG) +- fputs ("i", file); ++ fputs ("i", file); + break; + + default: +@@ -5770,10 +5792,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + case REG: + { + unsigned int regno = REGNO (op); +- if ((letter == 'M') +- || letter == 'D') +- regno++; +- else if (letter && letter != 'z' && letter != 'M' && letter != 'L') ++ if (letter && letter != 'z') + output_operand_lossage ("invalid use of '%%%c'", letter); + fprintf (file, "%s", reg_names[regno]); + } +@@ -5781,8 +5800,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + + case MEM: + if (letter == 'D') +- output_address (GET_MODE (op), plus_constant (Pmode, +- XEXP (op, 0), 4)); ++ output_address (GET_MODE (op), ++ plus_constant (Pmode, XEXP (op, 0), 4)); + else if (letter == 'b') + { + gcc_assert (REG_P (XEXP (op, 0))); +@@ -5809,7 +5828,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ + + static void +-loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) ++loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x) + { + struct loongarch_address_info addr; + +@@ -5821,6 +5840,11 @@ loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) + loongarch_print_operand (file, addr.offset, 0); + return; + ++ case ADDRESS_REG_REG: ++ fprintf (file, "%s,%s", reg_names[REGNO (addr.reg)], ++ reg_names[REGNO (addr.offset)]); ++ return; ++ + case ADDRESS_CONST_INT: + fprintf (file, "%s,", reg_names[GP_REG_FIRST]); + output_addr_const (file, x); +@@ -5830,37 +5854,17 @@ loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) + output_addr_const (file, loongarch_strip_unspec_address (x)); + return; + } +- if (GET_CODE (x) == CONST_INT) ++ if (CONST_INT_P (x)) + output_addr_const (file, x); + else + gcc_unreachable (); + } + +- +-/* Implement TARGET_ENCODE_SECTION_INFO. */ +- +-static void +-loongarch_encode_section_info (tree decl, rtx rtl, int first) +-{ +- default_encode_section_info (decl, rtl, first); +- +- if (TREE_CODE (decl) == FUNCTION_DECL) +- { +- rtx symbol = XEXP (rtl, 0); +- tree type = TREE_TYPE (decl); +- +- /* Encode whether the symbol is short or long. */ +- if ((TARGET_LONG_CALLS && !loongarch_near_type_p (type)) +- || loongarch_far_type_p (type)) +- SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_LONG_CALL; +- } +-} +- +-/* Implement TARGET_SELECT_RTX_SECTION. */ ++/* Implement TARGET_ASM_SELECT_RTX_SECTION. */ + + static section * + loongarch_select_rtx_section (machine_mode mode, rtx x, +- unsigned HOST_WIDE_INT align) ++ unsigned HOST_WIDE_INT align) + { + /* ??? Consider using mergeable small data sections. */ + if (loongarch_rtx_constant_in_small_data_p (mode)) +@@ -5871,12 +5875,10 @@ loongarch_select_rtx_section (machine_mode mode, rtx x, + + /* Implement TARGET_ASM_FUNCTION_RODATA_SECTION. + +- The complication here is that, with the combination +- !TARGET_ABSOLUTE_ABICALLS , jump tables will use +- absolute addresses, and should therefore not be included in the +- read-only part of a DSO. Handle such cases by selecting a normal +- data section instead of a read-only one. The logic apes that in +- default_function_rodata_section. */ ++ The complication here is that jump atbles will use absolute addresses, ++ and should therefore not be included in the read-only part of a DSO. ++ Handle such cases by selecting a normal data section instead of a ++ read-only one. The logic apes that in default_function_rodata_section. */ + + static section * + loongarch_function_rodata_section (tree decl) +@@ -5889,17 +5891,11 @@ loongarch_function_rodata_section (tree decl) + static bool + loongarch_in_small_data_p (const_tree decl) + { +- unsigned HOST_WIDE_INT size; ++ int size; + + if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL) + return false; + +- /* We don't yet generate small-data references for +- VxWorks RTP code. See the related -G handling in +- loongarch_option_override. */ +- if (TARGET_VXWORKS_RTP) +- return false; +- + if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0) + { + const char *name; +@@ -5918,23 +5914,12 @@ loongarch_in_small_data_p (const_tree decl) + /* We have traditionally not treated zero-sized objects as small data, + so this is now effectively part of the ABI. */ + size = int_size_in_bytes (TREE_TYPE (decl)); +- return size > 0 && size <= loongarch_small_data_threshold; ++ return size > 0 && size <= g_switch_value; + } + +-/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P. We don't want to use +- anchors for small data: the GP register acts as an anchor in that +- case. We also don't want to use them for PC-relative accesses, +- where the PC acts as an anchor. */ +- +-static bool +-loongarch_use_anchors_for_symbol_p (const_rtx symbol) +-{ +- return default_use_anchors_for_symbol_p (symbol); +-} +- +-/* The LARCH debug format wants all automatic variables and arguments ++/* The LoongArch debug format wants all automatic variables and arguments + to be in terms of the virtual frame pointer (stack pointer before +- any adjustment in the function), while the LARCH 3.0 linker wants ++ any adjustment in the function), while the LoongArch linker wants + the frame pointer to be the stack pointer after the initial + adjustment. So, we do the adjustment here. The arg pointer (which + is eliminated) points to the virtual frame pointer, while the frame +@@ -5961,7 +5946,7 @@ loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset) + + return offset; + } +- ++ + /* Implement ASM_OUTPUT_EXTERNAL. */ + + void +@@ -5971,7 +5956,7 @@ loongarch_output_external (FILE *file, tree decl, const char *name) + + /* We output the name if and only if TREE_SYMBOL_REFERENCED is + set in order to avoid putting out names that are never really +- used. */ ++ used. */ + if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) + { + if (loongarch_in_small_data_p (decl)) +@@ -6014,33 +5999,6 @@ loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x) + fputs ("+0x8000", file); + } + +-/* Implement TARGET_DWARF_REGISTER_SPAN. */ +- +-static rtx +-loongarch_dwarf_register_span (rtx reg) +-{ +- rtx high, low; +- machine_mode mode; +- +- mode = GET_MODE (reg); +- +- return NULL_RTX; +-} +- +-/* Implement TARGET_DWARF_FRAME_REG_MODE. */ +- +-static machine_mode +-loongarch_dwarf_frame_reg_mode (int regno) +-{ +- machine_mode mode = default_dwarf_frame_reg_mode (regno); +- +- if (FP_REG_P (regno) && loongarch_abi == ABILP32 && TARGET_FLOAT64) +- mode = SImode; +- +- return mode; +-} +- +- + /* Implement ASM_OUTPUT_ASCII. */ + + void +@@ -6072,7 +6030,7 @@ loongarch_output_ascii (FILE *stream, const char *string, size_t len) + cur_pos += 4; + } + +- if (cur_pos > 72 && i+1 < len) ++ if (cur_pos > 72 && i + 1 < len) + { + cur_pos = 17; + fprintf (stream, "\"\n\t.ascii\t\""); +@@ -6081,194 +6039,6 @@ loongarch_output_ascii (FILE *stream, const char *string, size_t len) + fprintf (stream, "\"\n"); + } + +-/* Emit either a label, .comm, or .lcomm directive. When using assembler +- macros, mark the symbol as written so that loongarch_asm_output_external +- won't emit an .extern for it. STREAM is the output file, NAME is the +- name of the symbol, INIT_STRING is the string that should be written +- before the symbol and FINAL_STRING is the string that should be +- written after it. FINAL_STRING is a printf format that consumes the +- remaining arguments. */ +- +-void +-loongarch_declare_object (FILE *stream, const char *name, const char *init_string, +- const char *final_string, ...) +-{ +- va_list ap; +- +- fputs (init_string, stream); +- assemble_name (stream, name); +- va_start (ap, final_string); +- vfprintf (stream, final_string, ap); +- va_end (ap); +- +- tree name_tree = get_identifier (name); +- TREE_ASM_WRITTEN (name_tree) = 1; +-} +- +-/* Declare a common object of SIZE bytes using asm directive INIT_STRING. +- NAME is the name of the object and ALIGN is the required alignment +- in bytes. TAKES_ALIGNMENT_P is true if the directive takes a third +- alignment argument. */ +- +-void +-loongarch_declare_common_object (FILE *stream, const char *name, +- const char *init_string, +- unsigned HOST_WIDE_INT size, +- unsigned int align, bool takes_alignment_p) +-{ +- if (!takes_alignment_p) +- { +- size += (align / BITS_PER_UNIT) - 1; +- size -= size % (align / BITS_PER_UNIT); +- loongarch_declare_object (stream, name, init_string, +- "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size); +- } +- else +- loongarch_declare_object (stream, name, init_string, +- "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", +- size, align / BITS_PER_UNIT); +-} +- +-/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON. This is usually the same as the +- elfos.h version, but we also need to handle -muninit-const-in-rodata. */ +- +-void +-loongarch_output_aligned_decl_common (FILE *stream, tree decl, const char *name, +- unsigned HOST_WIDE_INT size, +- unsigned int align) +-{ +- loongarch_declare_common_object (stream, name, "\n\t.comm\t", +- size, align, true); +-} +- +-#ifdef ASM_OUTPUT_SIZE_DIRECTIVE +-extern int size_directive_output; +- +-/* Implement ASM_DECLARE_OBJECT_NAME. This is like most of the standard ELF +- definitions except that it uses loongarch_declare_object to emit the label. */ +- +-void +-loongarch_declare_object_name (FILE *stream, const char *name, +- tree decl ATTRIBUTE_UNUSED) +-{ +-#ifdef ASM_OUTPUT_TYPE_DIRECTIVE +-#ifdef USE_GNU_UNIQUE_OBJECT +- /* As in elfos.h. */ +- if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (decl) +- && (!DECL_ARTIFICIAL (decl) || !TREE_READONLY (decl))) +- ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "gnu_unique_object"); +- else +-#endif +- ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); +-#endif +- +- size_directive_output = 0; +- if (!flag_inhibit_size_directive && DECL_SIZE (decl)) +- { +- HOST_WIDE_INT size; +- +- size_directive_output = 1; +- size = int_size_in_bytes (TREE_TYPE (decl)); +- ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); +- } +- +- loongarch_declare_object (stream, name, "", ":\n"); +-} +- +-/* Implement ASM_FINISH_DECLARE_OBJECT. This is generic ELF stuff. */ +- +-void +-loongarch_finish_declare_object (FILE *stream, tree decl, int top_level, int at_end) +-{ +- const char *name; +- +- name = XSTR (XEXP (DECL_RTL (decl), 0), 0); +- if (!flag_inhibit_size_directive +- && DECL_SIZE (decl) != 0 +- && !at_end +- && top_level +- && DECL_INITIAL (decl) == error_mark_node +- && !size_directive_output) +- { +- HOST_WIDE_INT size; +- +- size_directive_output = 1; +- size = int_size_in_bytes (TREE_TYPE (decl)); +- ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); +- } +-} +-#endif +- +-/* Mark text contents as code or data, mainly for the purpose of correct +- disassembly. Emit a local symbol and set its type appropriately for +- that purpose. Also emit `.insn' if marking contents as code so that +- the ISA mode is recorded and any padding that follows is disassembled +- as correct instructions. */ +- +-void +-loongarch_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED, +- const char *prefix ATTRIBUTE_UNUSED, +- unsigned long num ATTRIBUTE_UNUSED, +- bool function_p ATTRIBUTE_UNUSED) +-{ +-#ifdef ASM_OUTPUT_TYPE_DIRECTIVE +- char buf[(sizeof (num) * 10) / 4 + 2]; +- const char *fnname; +- char *sname; +- rtx symbol; +- +- sprintf (buf, "%lu", num); +- symbol = XEXP (DECL_RTL (current_function_decl), 0); +- fnname = targetm.strip_name_encoding (XSTR (symbol, 0)); +- sname = ACONCAT ((prefix, fnname, "_", buf, NULL)); +- +- ASM_OUTPUT_TYPE_DIRECTIVE (file, sname, function_p ? "function" : "object"); +- assemble_name (file, sname); +- fputs (":\n", file); +-// if (function_p) +-// fputs ("\t.insn\n", file); +-#endif +-} +- +- +-/* Implement TARGET_ASM_FILE_START. */ +- +-static void +-loongarch_file_start (void) +-{ +- default_file_start (); +- +- /* Generate a special section to describe the ABI switches used to +- produce the resultant binary. */ +-} +- +- +-/* Return true if REGNO is a register that is ordinarily call-clobbered +- but must nevertheless be preserved by an interrupt handler. */ +- +-static bool +-loongarch_interrupt_extra_call_saved_reg_p (unsigned int regno) +-{ +- if (GP_REG_P (regno) +- && cfun->machine->use_shadow_register_set == SHADOW_SET_NO) +- { +- /* $0 is hard-wired. */ +- if (regno == GP_REG_FIRST) +- return false; +- +- /* The function will return the stack pointer to its original value +- anyway. */ +- if (regno == STACK_POINTER_REGNUM) +- return false; +- +- /* Otherwise, return true for registers that aren't ordinarily +- call-clobbered. */ +- return call_used_regs[regno]; +- } +- +- return false; +-} +- + /* Implement TARGET_FRAME_POINTER_REQUIRED. */ + + static bool +@@ -6282,17 +6052,20 @@ loongarch_frame_pointer_required (void) + return false; + } + +-/* Make sure that we're not trying to eliminate to the wrong hard frame +- pointer. */ ++/* Implement TARGET_CAN_ELIMINATE. Make sure that we're not trying ++ to eliminate to the wrong hard frame pointer. */ + + static bool + loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) + { ++ if (stack_realign_fp) ++ return ((from == ARG_POINTER_REGNUM ++ && to == HARD_FRAME_POINTER_REGNUM) ++ || (from == FRAME_POINTER_REGNUM ++ && to == STACK_POINTER_REGNUM)); + return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM); + } + +- +- + /* Implement RETURN_ADDR_RTX. We do not support moving back to a + previous frame. */ + +@@ -6315,73 +6088,21 @@ loongarch_set_return_address (rtx address, rtx scratch) + rtx slot_address; + + gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)); ++ + if (frame_pointer_needed) + slot_address = loongarch_add_offset (scratch, hard_frame_pointer_rtx, +- -UNITS_PER_WORD); ++ -UNITS_PER_WORD); + else + slot_address = loongarch_add_offset (scratch, stack_pointer_rtx, +- cfun->machine->frame.gp_sp_offset); +- loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address); +-} +- +- +-/* Fill *BASE and *OFFSET such that *BASE + *OFFSET refers to the +- cprestore slot. LOAD_P is true if the caller wants to load from +- the cprestore slot; it is false if the caller wants to store to +- the slot. */ +- +-static void +-loongarch_get_cprestore_base_and_offset (rtx *base, HOST_WIDE_INT *offset, +- bool load_p) +-{ +- const struct loongarch_frame_info *frame; +- +- frame = &cfun->machine->frame; +- /* .cprestore always uses the stack pointer instead of the frame pointer. +- We have a free choice for direct stores, +- Using the stack pointer would sometimes give more +- (early) scheduling freedom, but using the frame pointer would +- sometimes give more (late) scheduling freedom. It's hard to +- predict which applies to a given function, so let's keep things +- simple. +- +- Loads must always use the frame pointer in functions that call +- alloca, and there's little benefit to using the stack pointer +- otherwise. */ +- if (frame_pointer_needed) +- { +- *base = hard_frame_pointer_rtx; +- *offset = frame->args_size - frame->hard_frame_pointer_offset; +- } +- else +- { +- *base = stack_pointer_rtx; +- *offset = frame->args_size; +- } +-} ++ cfun->machine->frame.gp_sp_offset); + +-/* Return true if X is the load or store address of the cprestore slot; +- LOAD_P says which. */ +- +-bool +-loongarch_cprestore_address_p (rtx x, bool load_p) +-{ +- rtx given_base, required_base; +- HOST_WIDE_INT given_offset, required_offset; +- +- loongarch_split_plus (x, &given_base, &given_offset); +- loongarch_get_cprestore_base_and_offset (&required_base, &required_offset, load_p); +- return given_base == required_base && given_offset == required_offset; ++ loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address), ++ address); + } + +- +-/* A function to save or store a register. The first argument is the +- register and the second is the stack slot. */ +-typedef void (*loongarch_save_restore_fn) (rtx, rtx); +- + /* LOONGSON LA464 Emit insn pattern for gssq and gslq*/ + void +-loongarch_la464_emit_128bit_load(rtx operands[]) ++loongarch_la464_emit_128bit_load (rtx operands[]) + { + rtx op0; + rtx op1; +@@ -6389,9 +6110,9 @@ loongarch_la464_emit_128bit_load(rtx operands[]) + rtx op3; + + #if 0 /*for debug*/ +- printf("464po: emit 128 PO LOAD!\n"); +- printf("reg num of op0 is: %d\n",REGNO(operands[0])); +- printf("reg num of op2 is: %d\n",REGNO(operands[2])); ++ printf ("464po: emit 128 PO LOAD!\n"); ++ printf ("reg num of op0 is: %d\n",REGNO (operands[0])); ++ printf ("reg num of op2 is: %d\n",REGNO (operands[2])); + #endif + op0 = gen_rtx_REG (GET_MODE (operands[0]), REGNO (operands[0])); + op1 = operands[1]; +@@ -6403,8 +6124,8 @@ loongarch_la464_emit_128bit_load(rtx operands[]) + gen_rtx_SET (op2,op3)))); + } + +-void +-loongarch_la464_emit_128bit_store(rtx operands[]) ++void ++loongarch_la464_emit_128bit_store (rtx operands[]) + { + rtx op0; + rtx op1; +@@ -6412,10 +6133,10 @@ loongarch_la464_emit_128bit_store(rtx operands[]) + rtx op3; + + #if 0 /*for debug*/ +- printf("464po: emit 128 PO STORE!\n"); +- printf("reg num of op1 is: %d\n",REGNO(operands[1])); +- printf("reg num of op3 is: %d\n",REGNO(operands[3])); +-#endif ++ printf ("464po: emit 128 PO STORE!\n"); ++ printf ("reg num of op1 is: %d\n",REGNO (operands[1])); ++ printf ("reg num of op3 is: %d\n",REGNO (operands[3])); ++#endif + op0 = operands[0]; + op1 = gen_rtx_REG (GET_MODE (operands[1]), REGNO (operands[1])); + op2 = operands[2]; +@@ -6427,405 +6148,109 @@ loongarch_la464_emit_128bit_store(rtx operands[]) + + } + ++/* Return true if register REGNO can store a value of mode MODE. ++ The result of this function is cached in loongarch_hard_regno_mode_ok. */ + +- ++static bool ++loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) ++{ ++ unsigned int size; ++ enum mode_class mclass; + +-/* Implement ASM_DECLARE_FUNCTION_NAME. */ ++ if (mode == FCCmode) ++ return FCC_REG_P (regno); + +-void loongarch_declare_function_name(FILE *stream ATTRIBUTE_UNUSED, +- const char *name, tree fndecl ATTRIBUTE_UNUSED) +-{ +- loongarch_start_function_definition (name); +-} ++ size = GET_MODE_SIZE (mode); ++ mclass = GET_MODE_CLASS (mode); + +-/* Implement TARGET_OUTPUT_FUNCTION_PROLOGUE. */ ++ if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) && !LASX_SUPPORTED_MODE_P (mode)) ++ return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + +-static void +-loongarch_output_function_prologue (FILE *file) +-{ ++ /* For LSX, allow TImode and 128-bit vector modes in all FPR. */ ++ if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode)) ++ return true; ++ ++ /* For LASX, allow TImode and 256-bit vector modes in all FPR. FIXME: */ ++ if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode)) ++ return true; ++ ++ if (FP_REG_P (regno)) ++ { ++ if (mclass == MODE_FLOAT ++ || mclass == MODE_COMPLEX_FLOAT ++ || mclass == MODE_VECTOR_FLOAT) ++ return size <= UNITS_PER_FPVALUE; ++ ++ /* Allow integer modes that fit into a single register. We need ++ to put integers into FPRs when using instructions like CVT ++ and TRUNC. There's no point allowing sizes smaller than a word, ++ because the FPU has no appropriate load/store instructions. */ ++ if (mclass == MODE_INT) ++ return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; ++ } ++ ++ return false; + } + +-/* Implement TARGET_OUTPUT_FUNCTION_EPILOGUE. */ ++/* Implement TARGET_HARD_REGNO_MODE_OK. */ + +-static void +-loongarch_output_function_epilogue (FILE *) ++static bool ++loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode) + { +- const char *fnname; +- +- /* Get the function name the same way that toplev.c does before calling +- assemble_start_function. This is needed so that the name used here +- exactly matches the name used in ASM_DECLARE_FUNCTION_NAME. */ +- fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); +- loongarch_end_function_definition (fnname); ++ return loongarch_hard_regno_mode_ok_p[mode][regno]; + } +- + +-#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) ++static bool ++loongarch_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) ++{ ++ if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8) ++ return true; + +-#if PROBE_INTERVAL > 16384 +-#error Cannot use indexed addressing mode for stack probing +-#endif ++ return false; ++} + +-/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, +- inclusive. These are offsets from the current stack pointer. */ ++/* Implement TARGET_HARD_REGNO_NREGS. */ + +-static void +-loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) ++static unsigned int ++loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) + { ++ if (FCC_REG_P (regno)) ++ /* The size of FP status registers is always 4, because they only hold ++ FCCmode values, and FCCmode is always considered to be 4 bytes wide. */ ++ return (GET_MODE_SIZE (mode) + 3) / 4; + +- /* See if we have a constant small number of probes to generate. If so, +- that's the easy case. */ +- if ((TARGET_64BIT && (first + size <= 8 * PROBE_INTERVAL)) +- || (!TARGET_64BIT && (first + size <= 2048))) ++ if (FP_REG_P (regno)) + { +- HOST_WIDE_INT i; ++ if (LSX_SUPPORTED_MODE_P (mode)) ++ return 1; + +- /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until +- it exceeds SIZE. If only one probe is needed, this will not +- generate any code. Then probe at FIRST + SIZE. */ +- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -(first + i))); ++ if (LASX_SUPPORTED_MODE_P (mode)) ++ return 1; + +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -(first + size))); ++ return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; + } + +- /* Otherwise, do the same as above, but in a loop. Note that we must be +- extra careful with variables wrapping around because we might be at +- the very top (or the very bottom) of the address space and we have +- to be able to handle this case properly; in particular, we use an +- equality test for the loop condition. */ +- else +- { +- HOST_WIDE_INT rounded_size; +- rtx r13 = LARCH_PROLOGUE_TEMP (Pmode); +- rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode); +- rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode); ++ /* All other registers are word-sized. */ ++ return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++} + +- /* Sanity check for the addressing mode we're going to use. */ +- gcc_assert (first <= 16384); ++/* Implement CLASS_MAX_NREGS, taking the maximum of the cases ++ in loongarch_hard_regno_nregs. */ + ++int ++loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) ++{ ++ int size; ++ HARD_REG_SET left; + +- /* Step 1: round SIZE to the previous multiple of the interval. */ ++ size = 0x8000; ++ COPY_HARD_REG_SET (left, reg_class_contents[(int) rclass]); ++ if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FCC_REGS])) ++ { ++ if (loongarch_hard_regno_mode_ok (FCC_REG_FIRST, mode)) ++ size = MIN (size, 4); + +- rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); +- /* TEST_ADDR = SP + FIRST */ +- if (first != 0) +- { +- emit_move_insn (r14, GEN_INT (first)); +- emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode, stack_pointer_rtx, r14))); +- } +- else +- emit_move_insn (r13, stack_pointer_rtx); +- +- /* Step 2: compute initial and final value of the loop counter. */ +- +- emit_move_insn (r14, GEN_INT (PROBE_INTERVAL)); +- if (rounded_size == 0) +- emit_move_insn (r12, r13); +- /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ +- else +- { +- emit_move_insn (r12, GEN_INT (rounded_size)); +- emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12))); +- /* Step 3: the loop +- +- do +- { +- TEST_ADDR = TEST_ADDR + PROBE_INTERVAL +- probe at TEST_ADDR +- } +- while (TEST_ADDR != LAST_ADDR) +- +- probes at FIRST + N * PROBE_INTERVAL for values of N from 1 +- until it is equal to ROUNDED_SIZE. */ +- +- emit_insn (PMODE_INSN (gen_probe_stack_range, (r13, r13, r12, r14))); +- } +- +- /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time +- that SIZE is equal to ROUNDED_SIZE. */ +- +- if (size != rounded_size) +- { +- if (TARGET_64BIT) +- emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); +- else +- { +- HOST_WIDE_INT i; +- for (i = 2048; i < (size - rounded_size); i += 2048 ) +- { +- emit_stack_probe (plus_constant (Pmode, r12, -i)); +- emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, r12, -2048))); +- } +- emit_stack_probe (plus_constant (Pmode, r12, -(size - rounded_size - i + 2048))); +- } +- } +- } +- +- /* Make sure nothing is scheduled before we are done. */ +- emit_insn (gen_blockage ()); +-} +- +-/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are +- absolute addresses. */ +- +-const char * +-loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) +-{ +- static int labelno = 0; +- char loop_lab[32], tmp[64]; +- rtx xops[3]; +- +- ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); +- +- /* Loop. */ +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); +- +- /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ +- xops[0] = reg1; +- xops[1] = GEN_INT (-PROBE_INTERVAL); +- xops[2] = reg3; +- if (TARGET_64BIT) +- output_asm_insn ("sub.d\t%0,%0,%2", xops); +- else +- output_asm_insn ("sub.w\t%0,%0,%2", xops); +- +- /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */ +- xops[1] = reg2; +- strcpy (tmp, "bne\t%0,%1,"); +- if (TARGET_64BIT) +- output_asm_insn ("st.d\t$r0,%0,0", xops); +- else +- output_asm_insn ("st.w\t$r0,%0,0", xops); +- output_asm_insn (strcat (tmp, &loop_lab[1]), xops); +- +- return ""; +-} +- +-/* Expand the "prologue" pattern. */ +- +-void +-loongarch_expand_prologue (void) +-{ +- struct loongarch_frame_info *frame = &cfun->machine->frame; +- HOST_WIDE_INT size = frame->total_size; +- unsigned mask = frame->mask; +- rtx insn; +- +- if (flag_stack_usage_info) +- current_function_static_stack_size = size; +- +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK +- || flag_stack_clash_protection) +- { +- if (crtl->is_leaf && !cfun->calls_alloca) +- { +- if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) +- loongarch_emit_probe_stack_range (get_stack_check_protect (), +- size - get_stack_check_protect ()); +- } +- else if (size > 0) +- loongarch_emit_probe_stack_range (get_stack_check_protect (), size); +- } +- +- /* When optimizing for size, call a subroutine to save the registers. */ +- if (loongarch_use_save_libcall (frame)) +- { +- rtx dwarf = NULL_RTX; +- dwarf = loongarch_adjust_libcall_cfi_prologue (); +- +- frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ +- size -= frame->save_libcall_adjustment; +- insn = emit_insn (gen_gpr_save (GEN_INT (mask))); +- +- RTX_FRAME_RELATED_P (insn) = 1; +- REG_NOTES (insn) = dwarf; +- } +- +- /* Save the registers. */ +- if ((frame->mask | frame->fmask) != 0) +- { +- HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame)); +- +- insn = gen_add3_insn (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-step1)); +- RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; +- size -= step1; +- loongarch_for_each_saved_reg (size, loongarch_save_reg); +- } +- +- frame->mask = mask; /* Undo the above fib. */ +- +- /* Set up the frame pointer, if we're using one. */ +- if (frame_pointer_needed) +- { +- insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, +- GEN_INT (frame->hard_frame_pointer_offset - size)); +- RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; +- +- loongarch_emit_stack_tie (); +- } +- +- /* Allocate the rest of the frame. */ +- if (size > 0) +- { +- if (SMALL_OPERAND (-size)) +- { +- insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-size)); +- RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; +- } +- else +- { +- loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); +- emit_insn (gen_add3_insn (stack_pointer_rtx, +- stack_pointer_rtx, +- N_LARCH_PROLOGUE_TEMP (Pmode))); +- +- /* Describe the effect of the previous instructions. */ +- insn = plus_constant (Pmode, stack_pointer_rtx, -size); +- insn = gen_rtx_SET (stack_pointer_rtx, insn); +- loongarch_set_frame_expr (insn); +- } +- } +-} +- +- +-/* Return true if register REGNO can store a value of mode MODE. +- The result of this function is cached in loongarch_hard_regno_mode_ok. */ +- +-static bool +-loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) +-{ +- unsigned int size; +- enum mode_class mclass; +- +- if (mode == FCCmode) +- return ST_REG_P (regno); +- +- size = GET_MODE_SIZE (mode); +- mclass = GET_MODE_CLASS (mode); +- +- if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) && !LASX_SUPPORTED_MODE_P (mode)) +- return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; +- +- /* For LSX, allow TImode and 128-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode)) +- return true; +- +- /* For LASX, allow TImode and 256-bit vector modes in all FPR. FIXME: */ +- if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode)) +- return true; +- +- if (FP_REG_P (regno) +- && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0 +- || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG))) +- { +- if (mclass == MODE_FLOAT +- || mclass == MODE_COMPLEX_FLOAT +- || mclass == MODE_VECTOR_FLOAT) +- return size <= UNITS_PER_FPVALUE; +- +- /* Allow integer modes that fit into a single register. We need +- to put integers into FPRs when using instructions like CVT +- and TRUNC. There's no point allowing sizes smaller than a word, +- because the FPU has no appropriate load/store instructions. */ +- if (mclass == MODE_INT) +- return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; +- } +- +- return false; +-} +- +-/* Implement TARGET_HARD_REGNO_MODE_OK. */ +- +-static bool +-loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode) +-{ +- return loongarch_hard_regno_mode_ok_p[mode][regno]; +-} +- +-/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ +- +-bool +-loongarch_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, +- unsigned int new_reg) +-{ +- /* Interrupt functions can only use registers that have already been +- saved by the prologue, even if they would normally be call-clobbered. */ +- if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (new_reg)) +- return false; +- +- return true; +-} +- +-/* Return nonzero if register REGNO can be used as a scratch register +- in peephole2. */ +- +-bool +-loongarch_hard_regno_scratch_ok (unsigned int regno) +-{ +- /* See loongarch_hard_regno_rename_ok. */ +- if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (regno)) +- return false; +- +- return true; +-} +- +-static bool +-loongarch_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) +-{ +- if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8) +- return true; +- +- return false; +-} +- +-/* Implement TARGET_HARD_REGNO_NREGS. */ +- +-static unsigned int +-loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) +-{ +- if (ST_REG_P (regno)) +- /* The size of FP status registers is always 4, because they only hold +- FCCmode values, and FCCmode is always considered to be 4 bytes wide. */ +- return (GET_MODE_SIZE (mode) + 3) / 4; +- +- if (FP_REG_P (regno)) +- { +- if (LSX_SUPPORTED_MODE_P (mode)) +- return 1; +- +- if (LASX_SUPPORTED_MODE_P (mode)) +- return 1; +- +- return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; +- } +- +- /* All other registers are word-sized. */ +- return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; +-} +- +-/* Implement CLASS_MAX_NREGS, taking the maximum of the cases +- in loongarch_hard_regno_nregs. */ +- +-int +-loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) +-{ +- int size; +- HARD_REG_SET left; +- +- size = 0x8000; +- COPY_HARD_REG_SET (left, reg_class_contents[(int) rclass]); +- if (hard_reg_set_intersect_p (left, reg_class_contents[(int) ST_REGS])) +- { +- if (loongarch_hard_regno_mode_ok (ST_REG_FIRST, mode)) +- size = MIN (size, 4); +- +- AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) ST_REGS]); ++ AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) FCC_REGS]); + } + if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS])) + { +@@ -6849,8 +6274,8 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) + /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ + + static bool +-loongarch_can_change_mode_class (machine_mode from, +- machine_mode to, reg_class_t rclass) ++loongarch_can_change_mode_class (machine_mode from, machine_mode to, ++ reg_class_t rclass) + { + /* Allow conversions between different Loongson integer vectors, + and between those vectors and DImode. */ +@@ -6866,42 +6291,10 @@ loongarch_can_change_mode_class (machine_mode from, + if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)) + return true; + +- /* Otherwise, there are several problems with changing the modes of +- values in floating-point registers: +- +- - When a multi-word value is stored in paired floating-point +- registers, the first register always holds the low word. We +- therefore can't allow FPRs to change between single-word and +- multi-word modes on big-endian targets. +- +- - GCC assumes that each word of a multiword register can be +- accessed individually using SUBREGs. This is not true for +- floating-point registers if they are bigger than a word. +- +- - Loading a 32-bit value into a 64-bit floating-point register +- will not sign-extend the value, despite what LOAD_EXTEND_OP +- says. We can't allow FPRs to change from SImode to a wider +- mode on 64-bit targets. +- +- - If the FPU has already interpreted a value in one format, we +- must not ask it to treat the value as having a different +- format. +- +- We therefore disallow all mode changes involving FPRs. */ +- + return !reg_classes_intersect_p (FP_REGS, rclass); + } + +-/* Implement target hook small_register_classes_for_mode_p. */ +- +-static bool +-loongarch_small_register_classes_for_mode_p (machine_mode mode +- ATTRIBUTE_UNUSED) +-{ +- return 0; +-} +- +-/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction, ++/* Return true if moves in mode MODE can use the FPU's fmov.fmt instruction, + or use the LSX's move.v instruction. */ + + static bool +@@ -6909,6 +6302,7 @@ loongarch_mode_ok_for_mov_fmt_p (machine_mode mode) + { + switch (mode) + { ++ case E_FCCmode: + case E_SFmode: + return TARGET_HARD_FLOAT; + +@@ -6976,7 +6370,7 @@ loongarch_move_to_gpr_cost (reg_class_t from) + return 2; + + case FP_REGS: +- /* MFC1, etc. */ ++ /* MOVFR2GR, etc. */ + return 4; + + default: +@@ -6998,7 +6392,7 @@ loongarch_move_from_gpr_cost (reg_class_t to) + return 2; + + case FP_REGS: +- /* MTC1, etc. */ ++ /* MOVGR2FR, etc. */ + return 4; + + default: +@@ -7011,8 +6405,8 @@ loongarch_move_from_gpr_cost (reg_class_t to) + the maximum for us. */ + + static int +-loongarch_register_move_cost (machine_mode mode, +- reg_class_t from, reg_class_t to) ++loongarch_register_move_cost (machine_mode mode, reg_class_t from, ++ reg_class_t to) + { + reg_class_t dregs; + int cost1, cost2; +@@ -7024,7 +6418,7 @@ loongarch_register_move_cost (machine_mode mode, + if (from == FP_REGS) + { + if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode)) +- /* MOV.FMT. */ ++ /* FMOV.FMT. */ + return 4; + } + +@@ -7054,28 +6448,6 @@ loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in) + { + return (loongarch_cost->memory_latency + + memory_move_secondary_cost (mode, rclass, in)); +-} +- +-/* Implement TARGET_SECONDARY_MEMORY_NEEDED. +- +- When targeting the o32 FPXX ABI, all moves with a length of doubleword +- or greater must be performed by FR-mode-aware instructions. +- This can be achieved using MOVFRH2GR.S/MOVGR2FRH.W when these instructions are +- available but otherwise moves must go via memory. +- Using MOVGR2FR/MOVFR2GR to access the lower-half of these registers would require +- a forbidden single-precision access. We require all double-word moves to use +- memory because adding even and odd floating-point registers classes +- would have a significant impact on the backend. */ +- +-static bool +-loongarch_secondary_memory_needed (machine_mode mode, reg_class_t class1, +- reg_class_t class2) +-{ +- /* Ignore spilled pseudos. */ +- if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) +- return false; +- +- return false; + } + + /* Return the register class required for a secondary register when +@@ -7084,9 +6456,10 @@ loongarch_secondary_memory_needed (machine_mode mode, reg_class_t class1, + is the destination. Return NO_REGS if no secondary register is + needed. */ + +-enum reg_class +-loongarch_secondary_reload_class (enum reg_class rclass, +- machine_mode mode, rtx x, bool) ++static reg_class_t ++loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, ++ reg_class_t rclass, machine_mode mode, ++ secondary_reload_info *sri ATTRIBUTE_UNUSED) + { + int regno; + +@@ -7094,15 +6467,12 @@ loongarch_secondary_reload_class (enum reg_class rclass, + + /* Copying from accumulator registers to anywhere other than a general + register requires a temporary general register. */ +-// if (reg_class_subset_p (rclass, ACC_REGS)) ?????? +-// return GP_REG_P (regno) ? NO_REGS : GR_REGS; + if (reg_class_subset_p (rclass, FP_REGS)) + { + if (regno < 0 + || (MEM_P (x) + && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8))) +- /* In this case we can use lwc1, swc1, ldc1 or sdc1. We'll use +- pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */ ++ /* In this case we can use fld.s, fst.s, fld.d or fst.d. */ + return NO_REGS; + + if (MEM_P (x) && LSX_SUPPORTED_MODE_P (mode)) +@@ -7110,17 +6480,18 @@ loongarch_secondary_reload_class (enum reg_class rclass, + return NO_REGS; + + if (GP_REG_P (regno) || x == CONST0_RTX (mode)) +- /* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or movfr2gr.d. */ ++ /* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or ++ * movfr2gr.d. */ + return NO_REGS; + + if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x)) +- /* We can force the constant to memory and use lwc1 +- and ldc1. As above, we will use pairs of lwc1s if ++ /* We can force the constant to memory and use fld.s ++ and fld.d. As above, we will use pairs of lwc1s if + ldc1 is not supported. */ + return NO_REGS; + + if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode)) +- /* In this case we can use mov.fmt. */ ++ /* In this case we can use fmov.{s/d}. */ + return NO_REGS; + + /* Otherwise, we need to reload through an integer register. */ +@@ -7132,7 +6503,19 @@ loongarch_secondary_reload_class (enum reg_class rclass, + return NO_REGS; + } + +- ++/* Implement TARGET_MODE_REP_EXTENDED */ ++ ++static int ++loongarch_mode_rep_extended (scalar_int_mode mode, scalar_int_mode mode_rep) ++{ ++ /* On 64-bit targets, SImode register values are sign-extended to DImode. */ ++ if (TARGET_64BIT && mode == SImode && mode_rep == DImode) ++ return SIGN_EXTEND; ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_VALID_POINTER_MODE. */ + + static bool +@@ -7160,7 +6543,7 @@ loongarch_scalar_mode_supported_p (scalar_mode mode) + + return default_scalar_mode_supported_p (mode); + } +- ++ + /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ + + static machine_mode +@@ -7233,17 +6616,15 @@ loongarch_adjust_insn_length (rtx_insn *insn, int length) + length += 4; + + /* See how many nops might be needed to avoid hardware hazards. */ +- if (!cfun->machine->ignore_hazard_length_p +- && INSN_P (insn) ++ if (INSN_P (insn) + && INSN_CODE (insn) >= 0) + switch (get_attr_hazard (insn)) + { + case HAZARD_NONE: + break; + +- case HAZARD_DELAY: + case HAZARD_FORBIDDEN_SLOT: +- length += NOP_INSN_LENGTH; ++ length += 4; + break; + } + +@@ -7258,8 +6639,8 @@ loongarch_adjust_insn_length (rtx_insn *insn, int length) + + const char * + loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands, +- const char *branch_if_true, +- const char *branch_if_false) ++ const char *branch_if_true, ++ const char *branch_if_false) + { + unsigned int length; + rtx taken; +@@ -7272,8 +6653,7 @@ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands, + return branch_if_true; + } + +- /* Generate a reversed branch around a direct jump. This fallback does +- not use branch-likely instructions. */ ++ /* Generate a reversed branch around a direct jump. */ + rtx_code_label *not_taken = gen_label_rtx (); + taken = operands[0]; + +@@ -7281,37 +6661,7 @@ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands, + operands[0] = not_taken; + output_asm_insn (branch_if_false, operands); + +- /* If INSN has a delay slot, we must provide delay slots for both the +- branch to NOT_TAKEN and the conditional jump. We must also ensure +- that INSN's delay slot is executed in the appropriate cases. */ +- if (final_sequence) +- { +- /* This first delay slot will always be executed, so use INSN's +- delay slot if is not annulled. */ +- if (!INSN_ANNULLED_BRANCH_P (insn)) +- { +- final_scan_insn (final_sequence->insn (1), +- asm_out_file, optimize, 1, NULL); +- final_sequence->insn (1)->set_deleted (); +- } +- fprintf (asm_out_file, "\n"); +- } +- +- output_asm_insn (LARCH_ABSOLUTE_JUMP ("b\t%0"), &taken); +- +- /* Now deal with its delay slot; see above. */ +- if (final_sequence) +- { +- /* This delay slot will only be executed if the branch is taken. +- Use INSN's delay slot if is annulled. */ +- if (INSN_ANNULLED_BRANCH_P (insn)) +- { +- final_scan_insn (final_sequence->insn (1), +- asm_out_file, optimize, 1, NULL); +- final_sequence->insn (1)->set_deleted (); +- } +- fprintf (asm_out_file, "\n"); +- } ++ output_asm_insn ("b\t%0", &taken); + + /* Output NOT_TAKEN. */ + targetm.asm_out.internal_label (asm_out_file, "L", +@@ -7326,21 +6676,23 @@ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands, + OPERANDS[3] is the second operand and may be zero or a register. */ + + const char * +-loongarch_output_equal_conditional_branch (rtx_insn* insn, rtx *operands, +- bool inverted_p) ++loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands, ++ bool inverted_p) + { + const char *branch[2]; + if (operands[3] == const0_rtx) + { + branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0"); +- } else ++ } ++ else + { + branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0"); + branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0"); + } + +- return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]); ++ return loongarch_output_conditional_branch (insn, operands, branch[1], ++ branch[0]); + } + + /* Return the assembly code for INSN, which branches to OPERANDS[0] +@@ -7351,7 +6703,7 @@ loongarch_output_equal_conditional_branch (rtx_insn* insn, rtx *operands, + + const char * + loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, +- bool inverted_p) ++ bool inverted_p) + { + const char *branch[2]; + +@@ -7377,7 +6729,7 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, + branch[!inverted_p] = LARCH_BRANCH ("b", "%0"); + branch[inverted_p] = "\t# branch never"; + break; +- default: ++ default: + gcc_unreachable (); + } + } +@@ -7385,31 +6737,19 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, + { + switch (GET_CODE (operands[1])) + { +- case LE: +- branch[!inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0"); +- branch[inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0"); +- break; +- case LEU: +- branch[!inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0"); +- branch[inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0"); +- break; +- case GT: +- branch[!inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0"); +- branch[inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0"); +- break; +- case GTU: +- branch[!inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0"); +- branch[inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0"); +- break; +- case LT: +- case LTU: +- case GE: +- case GEU: +- branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0"); +- branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0"); +- break; +- default: +- gcc_unreachable (); ++ case LE: ++ case LEU: ++ case GT: ++ case GTU: ++ case LT: ++ case LTU: ++ case GE: ++ case GEU: ++ branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0"); ++ break; ++ default: ++ gcc_unreachable (); + } + } + } +@@ -7419,30 +6759,11 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, + { + /* These cases are equivalent to comparisons against zero. */ + case LEU: +- inverted_p = !inverted_p; +- /* Fall through. */ + case GTU: +- branch[!inverted_p] = LARCH_BRANCH ("bne", "%2,%.,%0"); +- branch[inverted_p] = LARCH_BRANCH ("beq", "%2,%.,%0"); +- break; +- +- /* These cases are always true or always false. */ + case LTU: +- inverted_p = !inverted_p; +- /* Fall through. */ + case GEU: +- branch[!inverted_p] = LARCH_BRANCH ("beq", "%.,%.,%0"); +- branch[inverted_p] = LARCH_BRANCH ("bne", "%.,%.,%0"); +- break; +- +- case LE: +- branch[!inverted_p] = LARCH_BRANCH ("bge", "$r0,%2,%0"); +- branch[inverted_p] = LARCH_BRANCH ("blt", "$r0,%2,%0"); +- break; ++ case LE: + case GT: +- branch[!inverted_p] = LARCH_BRANCH ("blt", "$r0,%2,%0"); +- branch[inverted_p] = LARCH_BRANCH ("bge", "$r0,%2,%0"); +- break; + case LT: + case GE: + branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$r0,%0"); +@@ -7451,98 +6772,14 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, + default: + gcc_unreachable (); + } +- } +- return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]); ++ } ++ return loongarch_output_conditional_branch (insn, operands, branch[1], ++ branch[0]); + } +- +-/* Return the assembly code for DIV or DDIV instruction DIVISION, which has +- the operands given by OPERANDS. Add in a divide-by-zero check if needed. + +- When working around R4000 and R4400 errata, we need to make sure that +- the division is not immediately followed by a shift[1][2]. We also +- need to stop the division from being put into a branch delay slot[3]. +- The easiest way to avoid both problems is to add a nop after the +- division. When a divide-by-zero check is needed, this nop can be +- used to fill the branch delay slot. +- +- [1] If a double-word or a variable shift executes immediately +- after starting an integer division, the shift may give an +- incorrect result. See quotations of errata #16 and #28 from +- "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0" +- in loongarch.md for details. +- +- [2] A similar bug to [1] exists for all revisions of the +- R4000 and the R4400 when run in an MC configuration. +- From "LARCH R4000MC Errata, Processor Revision 2.2 and 3.0": +- +- "19. In this following sequence: +- +- ddiv (or ddivu or div or divu) +- dsll32 (or dsrl32, dsra32) +- +- if an MPT stall occurs, while the divide is slipping the cpu +- pipeline, then the following double shift would end up with an +- incorrect result. +- +- Workaround: The compiler needs to avoid generating any +- sequence with divide followed by extended double shift." +- +- This erratum is also present in "LARCH R4400MC Errata, Processor +- Revision 1.0" and "LARCH R4400MC Errata, Processor Revision 2.0 +- & 3.0" as errata #10 and #4, respectively. +- +- [3] From "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0" +- (also valid for LARCH R4000MC processors): +- +- "52. R4000SC: This bug does not apply for the R4000PC. +- +- There are two flavors of this bug: +- +- 1) If the instruction just after divide takes an RF exception +- (tlb-refill, tlb-invalid) and gets an instruction cache +- miss (both primary and secondary) and the line which is +- currently in secondary cache at this index had the first +- data word, where the bits 5..2 are set, then R4000 would +- get a wrong result for the div. +- +- ##1 +- nop +- div r8, r9 +- ------------------- # end-of page. -tlb-refill +- nop +- ##2 +- nop +- div r8, r9 +- ------------------- # end-of page. -tlb-invalid +- nop +- +- 2) If the divide is in the taken branch delay slot, where the +- target takes RF exception and gets an I-cache miss for the +- exception vector or where I-cache miss occurs for the +- target address, under the above mentioned scenarios, the +- div would get wrong results. +- +- ##1 +- j r2 # to next page mapped or unmapped +- div r8,r9 # this bug would be there as long +- # as there is an ICache miss and +- nop # the "data pattern" is present +- +- ##2 +- beq r0, r0, NextPage # to Next page +- div r8,r9 +- nop +- +- This bug is present for div, divu, ddiv, and ddivu +- instructions. +- +- Workaround: For item 1), OS could make sure that the next page +- after the divide instruction is also mapped. For item 2), the +- compiler could make sure that the divide instruction is not in +- the branch delay slot." +- +- These processors have PRId values of 0x00004220 and 0x00004300 for +- the R4000 and 0x00004400, 0x00004500 and 0x00004600 for the R4400. */ ++/* Return the assembly code for DIV.{W/D} instruction DIVISION, which has ++ the operands given by OPERANDS. Add in a divide-by-zero check if needed. ++ */ + + const char * + loongarch_output_division (const char *division, rtx *operands) +@@ -7571,13 +6808,13 @@ loongarch_lsx_output_division (const char *division, rtx *operands) + s = division; + if (TARGET_CHECK_ZERO_DIV) + { +- if(ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32) ++ if (ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32) + { + output_asm_insn ("xvsetallnez.%v0\t$fcc7,%u2",operands); + output_asm_insn (s, operands); + output_asm_insn ("bcnez\t$fcc7,1f", operands); + } +- else if(ISA_HAS_LSX) ++ else if (ISA_HAS_LSX) + { + output_asm_insn ("vsetallnez.%v0\t$fcc7,%w2",operands); + output_asm_insn (s, operands); +@@ -7587,80 +6824,13 @@ loongarch_lsx_output_division (const char *division, rtx *operands) + } + return s; + } +- +-/* Return true if destination of IN_INSN is used as add source in +- OUT_INSN. Both IN_INSN and OUT_INSN are of type fmadd. Example: +- madd.s dst, x, y, z +- madd.s a, dst, b, c */ +- +-bool +-loongarch_fmadd_bypass (rtx_insn *out_insn, rtx_insn *in_insn) +-{ +- int dst_reg, src_reg; +- +- gcc_assert (get_attr_type (in_insn) == TYPE_FMADD); +- gcc_assert (get_attr_type (out_insn) == TYPE_FMADD); +- +- extract_insn (in_insn); +- dst_reg = REG_P (recog_data.operand[0]); +- +- extract_insn (out_insn); +- src_reg = REG_P (recog_data.operand[1]); +- +- if (dst_reg == src_reg) +- return true; +- +- return false; +-} +- +-/* Return true if IN_INSN is a multiply-add or multiply-subtract +- instruction and if OUT_INSN assigns to the accumulator operand. */ +- +-bool +-loongarch_linked_madd_p (rtx_insn *out_insn, rtx_insn *in_insn) +-{ +- enum attr_accum_in accum_in; +- int accum_in_opnum; +- rtx accum_in_op; +- +- if (recog_memoized (in_insn) < 0) +- return false; +- +- accum_in = get_attr_accum_in (in_insn); +- if (accum_in == ACCUM_IN_NONE) +- return false; +- +- accum_in_opnum = accum_in - ACCUM_IN_0; +- +- extract_insn (in_insn); +- gcc_assert (accum_in_opnum < recog_data.n_operands); +- accum_in_op = recog_data.operand[accum_in_opnum]; +- +- return reg_set_p (accum_in_op, out_insn); +-} +- +-/* True if the dependency between OUT_INSN and IN_INSN is on the store +- data rather than the address. We need this because the cprestore +- pattern is type "store", but is defined using an UNSPEC_VOLATILE, +- which causes the default routine to abort. We just return false +- for that case. */ +- +-bool +-loongarch_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) +-{ +- if (GET_CODE (PATTERN (in_insn)) == UNSPEC_VOLATILE) +- return false; +- +- return store_data_bypass_p (out_insn, in_insn); +-} +- + + /* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output +- dependencies have no cost, except on the 20Kc where output-dependence +- is treated like input-dependence. */ ++ dependencies have no cost. */ + + static int +-loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned int) ++loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, ++ unsigned int) + { + if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT)) + return 0; +@@ -7672,15 +6842,10 @@ loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned + static int + loongarch_issue_rate (void) + { +- switch (loongarch_tune) +- { +- case PROCESSOR_LOONGARCH64: +- case PROCESSOR_LA464: +- return 4; +- +- default: +- return 1; +- } ++ if ((unsigned long) la_target.cpu_tune < N_TUNE_TYPES) ++ return loongarch_cpu_issue_rate[la_target.cpu_tune]; ++ else ++ return 1; + } + + /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should +@@ -7689,24 +6854,20 @@ loongarch_issue_rate (void) + static int + loongarch_multipass_dfa_lookahead (void) + { +- if (TUNE_LOONGARCH64 || TUNE_LA464) +- return 4; +- +- return 0; +-} +- +- +-static void +-loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, +- int max_ready ATTRIBUTE_UNUSED) +-{ ++ if ((unsigned long) la_target.cpu_tune < N_ARCH_TYPES) ++ return loongarch_cpu_multipass_dfa_lookahead[la_target.cpu_tune]; ++ else ++ return 0; + } + + /* Implement TARGET_SCHED_REORDER. */ + + static int +-loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, +- rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) ++loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, ++ int verbose ATTRIBUTE_UNUSED, ++ rtx_insn **ready ATTRIBUTE_UNUSED, ++ int *nreadyp ATTRIBUTE_UNUSED, ++ int cycle ATTRIBUTE_UNUSED) + { + return loongarch_issue_rate (); + } +@@ -7714,17 +6875,29 @@ loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUS + /* Implement TARGET_SCHED_REORDER2. */ + + static int +-loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, +- rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) ++loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, ++ int verbose ATTRIBUTE_UNUSED, ++ rtx_insn **ready ATTRIBUTE_UNUSED, ++ int *nreadyp ATTRIBUTE_UNUSED, ++ int cycle ATTRIBUTE_UNUSED) + { + return cached_can_issue_more; + } + ++/* Implement TARGET_SCHED_INIT. */ ++ ++static void ++loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED, ++ int verbose ATTRIBUTE_UNUSED, ++ int max_ready ATTRIBUTE_UNUSED) ++{} ++ + /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ + + static int +-loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, +- rtx_insn *insn, int more) ++loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, ++ int verbose ATTRIBUTE_UNUSED, rtx_insn *insn, ++ int more) + { + /* Ignore USEs and CLOBBERs; don't count them against the issue rate. */ + if (USEFUL_INSN_P (insn)) +@@ -7742,1243 +6915,2339 @@ loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNU + cached_can_issue_more = more; + return more; + } +- +-/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY), +- return the first operand of the associated PREF or PREFX insn. */ +- +-rtx +-loongarch_prefetch_cookie (rtx write, rtx locality) +-{ +- /* store_streamed / load_streamed. */ +- if (INTVAL (locality) <= 0) +- return GEN_INT (INTVAL (write) + 4); +- +- /* store / load. */ +- if (INTVAL (locality) <= 2) +- return write; +- +- /* store_retained / load_retained. */ +- return GEN_INT (INTVAL (write) + 6); +-} +- +- +-/* Return whether CFG is used in loongarch_reorg. */ + +-static bool +-loongarch_cfg_in_reorg (void) +-{ +- return (TARGET_RELAX_PIC_CALLS); +-} +- +-/* If INSN is a call, return the underlying CALL expr. Return NULL_RTX +- otherwise. If INSN has two call rtx, then store the second one in +- SECOND_CALL. */ ++/* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text ++ in order to avoid duplicating too much logic from elsewhere. */ + +-static rtx +-loongarch_call_expr_from_insn (rtx_insn *insn, rtx *second_call) ++static void ++loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, ++ tree function) + { +- rtx x; +- rtx x2; +- +- if (!CALL_P (insn)) +- return NULL_RTX; +- +- x = PATTERN (insn); +- if (GET_CODE (x) == PARALLEL) +- { +- /* Calls returning complex values have two CALL rtx. Look for the second +- one here, and return it via the SECOND_CALL arg. */ +- x2 = XVECEXP (x, 0, 1); +- if (GET_CODE (x2) == SET) +- x2 = XEXP (x2, 1); +- if (GET_CODE (x2) == CALL) +- *second_call = x2; +- +- x = XVECEXP (x, 0, 0); +- } +- if (GET_CODE (x) == SET) +- x = XEXP (x, 1); +- gcc_assert (GET_CODE (x) == CALL); ++ rtx this_rtx, temp1, temp2, fnaddr; ++ rtx_insn *insn; ++ bool use_sibcall_p; + +- return x; +-} ++ /* Pretend to be a post-reload pass while generating rtl. */ ++ reload_completed = 1; + +-/* REG is set in DEF. See if the definition is one of the ways we load a +- register with a symbol address for a loongarch_use_pic_fn_addr_reg_p call. +- If it is, return the symbol reference of the function, otherwise return +- NULL_RTX. ++ /* Mark the end of the (empty) prologue. */ ++ emit_note (NOTE_INSN_PROLOGUE_END); + +- If RECURSE_P is true, use loongarch_find_pic_call_symbol to interpret +- the values of source registers, otherwise treat such registers as +- having an unknown value. */ ++ /* Determine if we can use a sibcall to call FUNCTION directly. */ ++ fnaddr = XEXP (DECL_RTL (function), 0); ++ use_sibcall_p = const_call_insn_operand (fnaddr, Pmode); + +-static rtx +-loongarch_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p) +-{ +- rtx_insn *def_insn; +- rtx set; ++ /* We need two temporary registers in some cases. */ ++ temp1 = gen_rtx_REG (Pmode, 12); ++ temp2 = gen_rtx_REG (Pmode, 13); + +- if (DF_REF_IS_ARTIFICIAL (def)) +- return NULL_RTX; ++ /* Find out which register contains the "this" pointer. */ ++ if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) ++ this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1); ++ else ++ this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST); + +- def_insn = DF_REF_INSN (def); +- set = single_set (def_insn); +- if (set && rtx_equal_p (SET_DEST (set), reg)) ++ /* Add DELTA to THIS_RTX. */ ++ if (delta != 0) + { +- rtx note, src, symbol; +- +- /* First see whether the source is a plain symbol. This is used +- when calling symbols that are not lazily bound. */ +- src = SET_SRC (set); +- if (GET_CODE (src) == SYMBOL_REF) +- return src; +- +- /* Handle %call16 references. */ +- symbol = loongarch_strip_unspec_call (src); +- if (symbol) ++ rtx offset = GEN_INT (delta); ++ if (!IMM12_OPERAND (delta)) + { +- gcc_assert (GET_CODE (symbol) == SYMBOL_REF); +- return symbol; ++ loongarch_emit_move (temp1, offset); ++ offset = temp1; + } +- +- /* If we have something more complicated, look for a +- REG_EQUAL or REG_EQUIV note. */ +- note = find_reg_equal_equiv_note (def_insn); +- if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF) +- return XEXP (note, 0); +- +- /* Follow at most one simple register copy. Such copies are +- interesting in cases like: +- +- for (...) +- { +- locally_binding_fn (...); +- } +- +- and: +- +- locally_binding_fn (...); +- ... +- locally_binding_fn (...); +- +- where the load of locally_binding_fn can legitimately be +- hoisted or shared. However, we do not expect to see complex +- chains of copies, so a full worklist solution to the problem +- would probably be overkill. */ +- if (recurse_p && REG_P (src)) +- return loongarch_find_pic_call_symbol (def_insn, src, false); ++ emit_insn (gen_add3_insn (this_rtx, this_rtx, offset)); + } + +- return NULL_RTX; +-} ++ /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ ++ if (vcall_offset != 0) ++ { ++ rtx addr; + +-/* Find the definition of the use of REG in INSN. See if the definition +- is one of the ways we load a register with a symbol address for a +- loongarch_use_pic_fn_addr_reg_p call. If it is return the symbol reference +- of the function, otherwise return NULL_RTX. RECURSE_P is as for +- loongarch_pic_call_symbol_from_set. */ ++ /* Set TEMP1 to *THIS_RTX. */ ++ loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx)); + +-static rtx +-loongarch_find_pic_call_symbol (rtx_insn *insn, rtx reg, bool recurse_p) +-{ +- df_ref use; +- struct df_link *defs; +- rtx symbol; ++ /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ ++ addr = loongarch_add_offset (temp2, temp1, vcall_offset); + +- use = df_find_use (insn, regno_reg_rtx[REGNO (reg)]); +- if (!use) +- return NULL_RTX; +- defs = DF_REF_CHAIN (use); +- if (!defs) +- return NULL_RTX; +- symbol = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p); +- if (!symbol) +- return NULL_RTX; ++ /* Load the offset and add it to THIS_RTX. */ ++ loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr)); ++ emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); ++ } + +- /* If we have more than one definition, they need to be identical. */ +- for (defs = defs->next; defs; defs = defs->next) ++ /* Jump to the target function. Use a sibcall if direct jumps are ++ allowed, otherwise load the address into a register first. */ ++ if (use_sibcall_p) + { +- rtx other; +- +- other = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p); +- if (!rtx_equal_p (symbol, other)) +- return NULL_RTX; ++ insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); ++ SIBLING_CALL_P (insn) = 1; ++ } ++ else ++ { ++ loongarch_emit_move (temp1, fnaddr); ++ emit_jump_insn (gen_indirect_jump (temp1)); + } + +- return symbol; +-} +- +-/* Replace the args_size operand of the call expression CALL with the +- call-attribute UNSPEC and fill in SYMBOL as the function symbol. */ +- +-static void +-loongarch_annotate_pic_call_expr (rtx call, rtx symbol) +-{ +- rtx args_size; ++ /* Run just enough of rest_of_compilation. This sequence was ++ "borrowed" from alpha.c. */ ++ insn = get_insns (); ++ split_all_insns_noflow (); ++ shorten_branches (insn); ++ final_start_function (insn, file, 1); ++ final (insn, file, 1); ++ final_end_function (); + +- args_size = XEXP (call, 1); +- XEXP (call, 1) = gen_rtx_UNSPEC (GET_MODE (args_size), +- gen_rtvec (2, args_size, symbol), +- UNSPEC_CALL_ATTR); ++ /* Stop pretending to be a post-reload pass. */ ++ reload_completed = 0; + } + +-/* OPERANDS[ARGS_SIZE_OPNO] is the arg_size operand of a CALL expression. See +- if instead of the arg_size argument it contains the call attributes. If +- yes return true along with setting OPERANDS[ARGS_SIZE_OPNO] to the function +- symbol from the call attributes. Also return false if ARGS_SIZE_OPNO is +- -1. */ ++/* Allocate a chunk of memory for per-function machine-dependent data. */ + +-bool +-loongarch_get_pic_call_symbol (rtx *operands, int args_size_opno) ++static struct machine_function * ++loongarch_init_machine_status (void) + { +- rtx args_size, symbol; +- +- if (!TARGET_RELAX_PIC_CALLS || args_size_opno == -1) +- return false; +- +- args_size = operands[args_size_opno]; +- if (GET_CODE (args_size) != UNSPEC) +- return false; +- gcc_assert (XINT (args_size, 1) == UNSPEC_CALL_ATTR); +- +- symbol = XVECEXP (args_size, 0, 1); +- gcc_assert (GET_CODE (symbol) == SYMBOL_REF); +- +- operands[args_size_opno] = symbol; +- return true; ++ return ggc_cleared_alloc (); + } + +-/* Use DF to annotate PIC indirect calls with the function symbol they +- dispatch to. */ +- + static void +-loongarch_annotate_pic_calls (void) ++loongarch_cpu_option_override (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) + { +- basic_block bb; +- rtx_insn *insn; +- +- FOR_EACH_BB_FN (bb, cfun) +- FOR_BB_INSNS (bb, insn) ++ /* strict alignment */ ++ switch (target->cpu_arch) + { +- rtx call, reg, symbol, second_call; ++ case CPU_LA264: ++ /* Using -mstrict-align is recommended for la264 cores. */ ++ if (!opts_set->x_TARGET_STRICT_ALIGN) ++ { ++ opts->x_TARGET_STRICT_ALIGN = 1; ++ opts_set->x_TARGET_STRICT_ALIGN = 1; ++ } ++ break; ++ } + +- second_call = 0; +- call = loongarch_call_expr_from_insn (insn, &second_call); +- if (!call) +- continue; +- gcc_assert (MEM_P (XEXP (call, 0))); +- reg = XEXP (XEXP (call, 0), 0); +- if (!REG_P (reg)) +- continue; ++ /* software prefetching parameters (-fprefetch-loop-arrays) */ ++ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, ++ loongarch_cpu_cache[target->cpu_tune].simultaneous_prefetches, ++ opts->x_param_values, opts_set->x_param_values); + +- symbol = loongarch_find_pic_call_symbol (insn, reg, true); +- if (symbol) +- { +- loongarch_annotate_pic_call_expr (call, symbol); +- if (second_call) +- loongarch_annotate_pic_call_expr (second_call, symbol); +- } +- } +-} +- ++ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ++ loongarch_cpu_cache[target->cpu_tune].l1d_line_size, ++ opts->x_param_values, opts_set->x_param_values); + +-/* A structure representing the state of the processor pipeline. +- Used by the loongarch_sim_* family of functions. */ +-struct loongarch_sim { +- /* The maximum number of instructions that can be issued in a cycle. +- (Caches loongarch_issue_rate.) */ +- unsigned int issue_rate; +- +- /* The current simulation time. */ +- unsigned int time; +- +- /* How many more instructions can be issued in the current cycle. */ +- unsigned int insns_left; +- +- /* LAST_SET[X].INSN is the last instruction to set register X. +- LAST_SET[X].TIME is the time at which that instruction was issued. +- INSN is null if no instruction has yet set register X. */ +- struct { +- rtx_insn *insn; +- unsigned int time; +- } last_set[FIRST_PSEUDO_REGISTER]; +- +- /* The pipeline's current DFA state. */ +- state_t dfa_state; +-}; ++ maybe_set_param_value (PARAM_L1_CACHE_SIZE, ++ loongarch_cpu_cache[target->cpu_tune].l1d_size, ++ opts->x_param_values, opts_set->x_param_values); + +-/* Reset STATE to the initial simulation state. */ ++ maybe_set_param_value (PARAM_L2_CACHE_SIZE, ++ loongarch_cpu_cache[target->cpu_tune].l2d_size, ++ opts->x_param_values, opts_set->x_param_values); ++} + + static void +-loongarch_sim_reset (struct loongarch_sim *state) ++loongarch_option_override_internal (struct gcc_options *opts, ++ struct gcc_options *opts_set) + { +- curr_state = state->dfa_state; ++ int i, regno, mode; + +- state->time = 0; +- state->insns_left = state->issue_rate; +- memset (&state->last_set, 0, sizeof (state->last_set)); +- state_reset (curr_state); ++ if (flag_pic) ++ g_switch_value = 0; + +- targetm.sched.init (0, false, 0); +- advance_state (curr_state); +-} ++ loongarch_init_target (&la_target, ++ la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu, ++ la_opt_simd, la_opt_abi_base, la_opt_abi_ext, ++ la_opt_cmodel); + +-/* Initialize STATE before its first use. DFA_STATE points to an +- allocated but uninitialized DFA state. */ ++ /* Handle target-specific options: compute defaults/conflicts etc. */ ++ loongarch_config_target (&la_target, NULL, 0); + +-static void +-loongarch_sim_init (struct loongarch_sim *state, state_t dfa_state) +-{ +- if (targetm.sched.init_dfa_pre_cycle_insn) +- targetm.sched.init_dfa_pre_cycle_insn (); ++ loongarch_update_gcc_opt_status (&la_target, opts, opts_set); ++ loongarch_cpu_option_override (&la_target, opts, opts_set); + +- if (targetm.sched.init_dfa_post_cycle_insn) +- targetm.sched.init_dfa_post_cycle_insn (); ++ if (TARGET_ABI_LP64) ++ flag_pcc_struct_return = 0; + +- state->issue_rate = loongarch_issue_rate (); +- state->dfa_state = dfa_state; +- loongarch_sim_reset (state); +-} ++ /* Decide which rtx_costs structure to use. */ ++ if (optimize_size) ++ loongarch_cost = &loongarch_rtx_cost_optimize_size; ++ else ++ loongarch_cost = &loongarch_cpu_rtx_cost_data[la_target.cpu_tune]; + +- ++ /* If the user hasn't specified a branch cost, use the processor's ++ default. */ ++ if (loongarch_branch_cost == 0) ++ loongarch_branch_cost = loongarch_cost->branch_cost; + +-/* Set up costs based on the current architecture and tuning settings. */ ++ if (loongarch_vector_access_cost == 0) ++ loongarch_vector_access_cost = 5; + +-static void +-loongarch_set_tuning_info (void) +-{ + +- loongarch_tuning_info.arch = loongarch_arch; +- loongarch_tuning_info.tune = loongarch_tune; +- loongarch_tuning_info.initialized_p = true; ++ /* Enable sw prefetching at -O3 and higher. */ ++ if (opts->x_flag_prefetch_loop_arrays < 0 ++ && (opts->x_optimize >= 3 || opts->x_flag_profile_use) ++ && !opts->x_optimize_size) ++ opts->x_flag_prefetch_loop_arrays = 1; + +- dfa_start (); ++ switch (la_target.cmodel) ++ { ++ case CMODEL_TINY_STATIC: ++ case CMODEL_EXTREME: ++ if (opts->x_flag_plt) ++ error ("code model %qs and %qs not support %s mode", ++ "tiny-static", "extreme", "plt"); ++ break; + +- struct loongarch_sim state; +- loongarch_sim_init (&state, alloca (state_size ())); ++ case CMODEL_NORMAL: ++ case CMODEL_TINY: ++ case CMODEL_LARGE: ++ break; + +- dfa_finish (); +-} ++ default: ++ gcc_unreachable (); ++ } + +-/* Implement TARGET_EXPAND_TO_RTL_HOOK. */ ++ loongarch_init_print_operand_punct (); + +-static void +-loongarch_expand_to_rtl_hook (void) +-{ +- /* We need to call this at a point where we can safely create sequences +- of instructions, so TARGET_OVERRIDE_OPTIONS is too early. We also +- need to call it at a point where the DFA infrastructure is not +- already in use, so we can't just call it lazily on demand. +- +- At present, loongarch_tuning_info is only needed during post-expand +- RTL passes such as split_insns, so this hook should be early enough. +- We may need to move the call elsewhere if loongarch_tuning_info starts +- to be used for other things (such as rtx_costs, or expanders that +- could be called during gimple optimization). */ +- loongarch_set_tuning_info (); +-} +- +-/* This structure records that the current function has a LO_SUM +- involving SYMBOL_REF or LABEL_REF BASE and that MAX_OFFSET is +- the largest offset applied to BASE by all such LO_SUMs. */ +-struct loongarch_lo_sum_offset { +- rtx base; +- HOST_WIDE_INT offset; +-}; ++ /* Set up array to map GCC register number to debug register number. ++ Ignore the special purpose register numbers. */ + +-/* Return a hash value for SYMBOL_REF or LABEL_REF BASE. */ ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ { ++ if (GP_REG_P (i) || FP_REG_P (i)) ++ loongarch_dwarf_regno[i] = i; ++ else ++ loongarch_dwarf_regno[i] = INVALID_REGNUM; ++ } + +-static hashval_t +-loongarch_hash_base (rtx base) +-{ +- int do_not_record_p; ++ /* Set up loongarch_hard_regno_mode_ok. */ ++ for (mode = 0; mode < MAX_MACHINE_MODE; mode++) ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ loongarch_hard_regno_mode_ok_p[mode][regno] ++ = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); + +- return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false); +-} ++ /* Function to allocate machine-dependent function status. */ ++ init_machine_status = &loongarch_init_machine_status; + +-/* Hashtable helpers. */ ++ /* If not optimizing for size, set the default ++ alignment to what the target wants. */ ++ if (!opts->x_optimize_size) ++ { ++ if (opts->x_align_loops <= 0) ++ opts->x_align_loops = 16; ++ if (opts->x_align_jumps <= 0) ++ opts->x_align_jumps = 16; ++ if (opts->x_align_functions <= 0) ++ opts->x_align_functions = 16; ++ } + +-struct loongarch_lo_sum_offset_hasher : free_ptr_hash +-{ +- typedef rtx_def *compare_type; +- static inline hashval_t hash (const loongarch_lo_sum_offset *); +- static inline bool equal (const loongarch_lo_sum_offset *, const rtx_def *); +-}; ++ if (loongarch_veclibabi_name ++ && strcmp (loongarch_veclibabi_name, "sleef") != 0) ++ { ++ error ("unknown vectorization library ABI type (%qs) for " ++ "%qs", loongarch_veclibabi_name, "-mveclibabi="); ++ inform (input_location, ++ "valid arguments to %<-mveclibabi=%> are: %s", "sleef"); ++ } ++ if (!ISA_HAS_LASX) ++ loongarch_stack_realign = 0; + +-/* Hash-table callbacks for loongarch_lo_sum_offsets. */ ++ /* -mrecip options. */ ++ static struct ++ { ++ const char *string; /* option name */ ++ unsigned int mask; /* mask bits to set */ ++ } ++ const recip_options[] = ++ { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "rsqrt", RECIP_MASK_RSQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, ++ }; + +-inline hashval_t +-loongarch_lo_sum_offset_hasher::hash (const loongarch_lo_sum_offset *entry) +-{ +- return loongarch_hash_base (entry->base); +-} ++ if (loongarch_recip_name) ++ { ++ char *p = ASTRDUP (loongarch_recip_name); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_options[i].string)) ++ { ++ mask = recip_options[i].mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for -mrecip=%s", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ if (invert) ++ recip_mask &= ~mask; ++ else ++ recip_mask |= mask; ++ } ++ } ++ if (loongarch_recip) ++ recip_mask |= RECIP_MASK_ALL; + +-inline bool +-loongarch_lo_sum_offset_hasher::equal (const loongarch_lo_sum_offset *entry, +- const rtx_def *value) +-{ +- return rtx_equal_p (entry->base, value); + } + +-typedef hash_table loongarch_offset_table; +- + +-/* Subroutine of loongarch_reorg to manage passes that require DF. */ ++/* Implement TARGET_OPTION_OVERRIDE. */ + + static void +-loongarch_df_reorg (void) ++loongarch_option_override (void) + { +- /* Create def-use chains. */ +- df_set_flags (DF_EQ_NOTES); +- df_chain_add_problem (DF_UD_CHAIN); +- df_analyze (); +- +- if (TARGET_RELAX_PIC_CALLS) +- loongarch_annotate_pic_calls (); +- +- df_finish_pass (false); ++ loongarch_option_override_internal (&global_options, &global_options_set); + } + +- +-/* Implement TARGET_MACHINE_DEPENDENT_REORG. */ ++/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ + + static void +-loongarch_reorg (void) ++loongarch_conditional_register_usage (void) + { +- /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF.DF insn info is only kept up +- to date if the CFG is available. */ +- if (loongarch_cfg_in_reorg ()) +- compute_bb_for_insn (); +- if (loongarch_cfg_in_reorg ()) ++ if (!TARGET_HARD_FLOAT) + { +- loongarch_df_reorg (); +- free_bb_for_insn (); ++ AND_COMPL_HARD_REG_SET (accessible_reg_set, ++ reg_class_contents[(int) FP_REGS]); ++ AND_COMPL_HARD_REG_SET (accessible_reg_set, ++ reg_class_contents[(int) FCC_REGS]); + } ++ + } + +-/* We use a machine specific pass to do a second machine dependent reorg +- pass after delay branch scheduling. */ ++/* Implement EH_USES. */ + +-static unsigned int +-loongarch_machine_reorg2 (void) ++bool ++loongarch_eh_uses (unsigned int regno ATTRIBUTE_UNUSED) + { +-// loongarch_insert_insn_pseudos (); +- return 0; ++ return false; + } + +-namespace { +- +-const pass_data pass_data_loongarch_machine_reorg2 = +-{ +- RTL_PASS, /* type */ +- "mach2", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_MACH_DEP, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- 0, /* todo_flags_finish */ +-}; ++/* Implement EPILOGUE_USES. */ + +-class pass_loongarch_machine_reorg2 : public rtl_opt_pass ++bool ++loongarch_epilogue_uses (unsigned int regno) + { +-public: +- pass_loongarch_machine_reorg2(gcc::context *ctxt) +- : rtl_opt_pass(pass_data_loongarch_machine_reorg2, ctxt) +- {} +- +- /* opt_pass methods: */ +- virtual unsigned int execute (function *) { return loongarch_machine_reorg2 (); } +- +-}; // class pass_loongarch_machine_reorg2 +- +-} // anon namespace ++ /* Say that the epilogue uses the return address register. Note that ++ in the case of sibcalls, the values "used by the epilogue" are ++ considered live at the start of the called function. */ ++ if (regno == RETURN_ADDR_REGNUM) ++ return true; + +-rtl_opt_pass * +-make_pass_loongarch_machine_reorg2 (gcc::context *ctxt) +-{ +- return new pass_loongarch_machine_reorg2 (ctxt); ++ return false; + } + +- +-/* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text +- in order to avoid duplicating too much logic from elsewhere. */ +- +-static void +-loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, +- HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, +- tree function) ++bool ++loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p) + { +- rtx this_rtx, temp1, temp2, fnaddr; +- rtx_insn *insn; +- bool use_sibcall_p; +- +- /* Pretend to be a post-reload pass while generating rtl. */ +- reload_completed = 1; +- +- /* Mark the end of the (empty) prologue. */ +- emit_note (NOTE_INSN_PROLOGUE_END); +- +- /* Determine if we can use a sibcall to call FUNCTION directly. */ +- fnaddr = XEXP (DECL_RTL (function), 0); +- use_sibcall_p = (loongarch_function_ok_for_sibcall (function, NULL) +- && const_call_insn_operand (fnaddr, Pmode)); +- +-// /* Determine if we need to load FNADDR from the GOT. */ +-// if (!use_sibcall_p +-// && (loongarch_got_symbol_type_p +-// (loongarch_classify_symbol (fnaddr, SYMBOL_CONTEXT_LEA)))) +-// { +-// /* Pick a global pointer. Use a call-clobbered register if +-// TARGET_CALL_SAVED_GP. */ +-// cfun->machine->global_pointer +-// = GLOBAL_POINTER_REGNUM; +-// cfun->machine->must_initialize_gp_p = true; +-// SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer); +-// +-// /* Set up the global pointer for n32 or n64 abicalls. */ +-// loongarch_emit_loadgp (); +-// } +- +- /* We need two temporary registers in some cases. */ +- temp1 = gen_rtx_REG (Pmode, 12); +- temp2 = gen_rtx_REG (Pmode, 13); +- +- /* Find out which register contains the "this" pointer. */ +- if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) +- this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1); +- else +- this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST); +- +- /* Add DELTA to THIS_RTX. */ +- if (delta != 0) +- { +- rtx offset = GEN_INT (delta); +- if (!SMALL_OPERAND (delta)) +- { +- loongarch_emit_move (temp1, offset); +- offset = temp1; +- } +- emit_insn (gen_add3_insn (this_rtx, this_rtx, offset)); +- } +- +- /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ +- if (vcall_offset != 0) +- { +- rtx addr; +- +- /* Set TEMP1 to *THIS_RTX. */ +- loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx)); +- +- /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ +- addr = loongarch_add_offset (temp2, temp1, vcall_offset); +- +- /* Load the offset and add it to THIS_RTX. */ +- loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr)); +- emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); +- } ++ rtx reg1, reg2, mem1, mem2, base1, base2; ++ enum reg_class rc1, rc2; ++ HOST_WIDE_INT offset1, offset2; + +- /* Jump to the target function. Use a sibcall if direct jumps are +- allowed, otherwise load the address into a register first. */ +- if (use_sibcall_p) ++ if (load_p) + { +- insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); +- SIBLING_CALL_P (insn) = 1; ++ reg1 = operands[0]; ++ reg2 = operands[2]; ++ mem1 = operands[1]; ++ mem2 = operands[3]; + } + else + { +- loongarch_emit_move (temp1, fnaddr); +- emit_jump_insn (gen_indirect_jump (temp1)); ++ reg1 = operands[1]; ++ reg2 = operands[3]; ++ mem1 = operands[0]; ++ mem2 = operands[2]; + } + +- /* Run just enough of rest_of_compilation. This sequence was +- "borrowed" from alpha.c. */ +- insn = get_insns (); +- split_all_insns_noflow (); +- shorten_branches (insn); +- final_start_function (insn, file, 1); +- final (insn, file, 1); +- final_end_function (); ++ if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0 ++ || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0) ++ return false; + +- /* Clean up the vars set above. Note that final_end_function resets +- the global pointer for us. */ +- reload_completed = 0; +-} +- ++ loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); ++ loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); + +-/* Allocate a chunk of memory for per-function machine-dependent data. */ ++ /* Base regs do not match. */ ++ if (!REG_P (base1) || !rtx_equal_p (base1, base2)) ++ return false; + +-static struct machine_function * +-loongarch_init_machine_status (void) +-{ +- return ggc_cleared_alloc (); +-} ++ /* Either of the loads is clobbering base register. It is legitimate to bond ++ loads if second load clobbers base register. However, hardware does not ++ support such bonding. */ ++ if (load_p ++ && (REGNO (reg1) == REGNO (base1) || (REGNO (reg2) == REGNO (base1)))) ++ return false; + +-/* Return the processor associated with the given ISA level, or null +- if the ISA isn't valid. */ ++ /* Loading in same registers. */ ++ if (load_p && REGNO (reg1) == REGNO (reg2)) ++ return false; + +-static const struct loongarch_cpu_info * +-loongarch_cpu_info_from_isa (int isa) +-{ +- unsigned int i; ++ /* The loads/stores are not of same type. */ ++ rc1 = REGNO_REG_CLASS (REGNO (reg1)); ++ rc2 = REGNO_REG_CLASS (REGNO (reg2)); ++ if (rc1 != rc2 && !reg_class_subset_p (rc1, rc2) ++ && !reg_class_subset_p (rc2, rc1)) ++ return false; + +- for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++) +- if (loongarch_cpu_info_table[i].isa == isa) +- return loongarch_cpu_info_table + i; ++ if (abs (offset1 - offset2) != GET_MODE_SIZE (mode)) ++ return false; + +- return NULL; ++ return true; + } + +-/* Return a loongarch_cpu_info entry determined by an option valued +- OPT. */ ++/* Implement TARGET_TRAMPOLINE_INIT. */ + +-static const struct loongarch_cpu_info * +-loongarch_cpu_info_from_opt (int opt) ++static void ++loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + { +- switch (opt) +- { +- case LARCH_ARCH_OPTION_NATIVE: +- gcc_unreachable (); +- +- default: +- return &loongarch_cpu_info_table[opt]; +- } +-} ++ rtx addr, end_addr, mem; ++ rtx trampoline[8]; ++ unsigned int i, j; ++ HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset; + +-/* Return a default loongarch_cpu_info entry, given that no -march= option +- was explicitly specified. */ ++ /* Work out the offsets of the pointers from the start of the ++ trampoline code. */ ++ end_addr_offset = TRAMPOLINE_CODE_SIZE; ++ static_chain_offset = end_addr_offset; ++ target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode); + +-static const struct loongarch_cpu_info * +-loongarch_default_arch (void) +-{ +-#if defined (LARCH_CPU_STRING_DEFAULT) +- unsigned int i; +- for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++) +- if (strcmp (loongarch_cpu_info_table[i].name, LARCH_CPU_STRING_DEFAULT) == 0) +- return loongarch_cpu_info_table + i; +- gcc_unreachable (); +-#elif defined (LARCH_ISA_DEFAULT) +- return loongarch_cpu_info_from_isa (LARCH_ISA_DEFAULT); +-#else +- gcc_unreachable (); +-#endif +-} ++ /* Get pointers to the beginning and end of the code block. */ ++ addr = force_reg (Pmode, XEXP (m_tramp, 0)); ++ end_addr ++ = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset)); + +-/* Set up globals to generate code for the ISA or processor +- described by INFO. */ ++#define OP(X) gen_int_mode (X, SImode) + +-static void +-loongarch_set_architecture (const struct loongarch_cpu_info *info) +-{ +- if (info != 0) +- { +- loongarch_arch_info = info; +- loongarch_arch = info->cpu; +- loongarch_isa = info->isa; +- if (loongarch_isa < 32) +- loongarch_isa_rev = 0; +- else +- loongarch_isa_rev = (loongarch_isa & 31) + 1; +- } +-} ++ /* Build up the code in TRAMPOLINE. */ ++ i = 0; ++ /*pcaddi $static_chain,0 ++ ld.[dw] $tmp,$static_chain,target_function_offset ++ ld.[dw] $static_chain,$static_chain,static_chain_offset ++ jirl $r0,$tmp,0 */ ++ trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST)); ++ trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) ++ | 19 /* $t7 */ ++ | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) ++ | ((target_function_offset & 0xfff) << 10)); ++ trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) ++ | (STATIC_CHAIN_REGNUM - GP_REG_FIRST) ++ | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) ++ | ((static_chain_offset & 0xfff) << 10)); ++ trampoline[i++] = OP (0x4c000000 | (19 << 5)); ++#undef OP + +-/* Likewise for tuning. */ ++ for (j = 0; j < i; j++) ++ { ++ mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode)); ++ loongarch_emit_move (mem, trampoline[j]); ++ } + +-static void +-loongarch_set_tune (const struct loongarch_cpu_info *info) +-{ +- if (info != 0) +- { +- loongarch_tune_info = info; +- loongarch_tune = info->cpu; +- } +-} ++ /* Set up the static chain pointer field. */ ++ mem = adjust_address (m_tramp, ptr_mode, static_chain_offset); ++ loongarch_emit_move (mem, chain_value); + +-/* Implement TARGET_OPTION_OVERRIDE. */ ++ /* Set up the target function field. */ ++ mem = adjust_address (m_tramp, ptr_mode, target_function_offset); ++ loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0)); + +-static void +-loongarch_option_override (void) +-{ +- int i, start, regno, mode; ++ /* Flush the code part of the trampoline. */ ++ emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE))); ++ emit_insn (gen_clear_cache (addr, end_addr)); ++} + +-#ifdef SUBTARGET_OVERRIDE_OPTIONS +- SUBTARGET_OVERRIDE_OPTIONS; +-#endif ++/* Generate or test for an insn that supports a constant permutation. */ + ++#define MAX_VECT_LEN 32 + +- /* -mno-float overrides -mhard-float and -msoft-float. */ +- if (TARGET_NO_FLOAT) +- { +- target_flags |= MASK_SOFT_FLOAT_ABI; +- target_flags_explicit |= MASK_SOFT_FLOAT_ABI; +- } +- +- +- /* Set the small data limit. */ +- loongarch_small_data_threshold = (global_options_set.x_g_switch_value +- ? g_switch_value +- : LARCH_DEFAULT_GVALUE); +- +- /* The following code determines the architecture and register size. +- Similar code was added to GAS 2.14 (see tc-loongarch.c:md_after_parse_args()). +- The GAS and GCC code should be kept in sync as much as possible. */ +- +- if (global_options_set.x_loongarch_arch_option) +- loongarch_set_architecture (loongarch_cpu_info_from_opt (loongarch_arch_option)); ++struct expand_vec_perm_d ++{ ++ rtx target, op0, op1; ++ unsigned char perm[MAX_VECT_LEN]; ++ machine_mode vmode; ++ unsigned char nelt; ++ bool one_vector_p; ++ bool testing_p; ++}; + +- if (loongarch_arch_info == 0) +- loongarch_set_architecture (loongarch_default_arch ()); ++/* Construct (set target (vec_select op0 (parallel perm))) and ++ return true if that's a valid instruction in the active ISA. */ + +- /* Optimize for loongarch_arch, unless -mtune selects a different processor. */ +- if (global_options_set.x_loongarch_tune_option) +- loongarch_set_tune (loongarch_cpu_info_from_opt (loongarch_tune_option)); ++static bool ++loongarch_expand_vselect (rtx target, rtx op0, ++ const unsigned char *perm, unsigned nelt) ++{ ++ rtx rperm[MAX_VECT_LEN], x; ++ rtx_insn *insn; ++ unsigned i; + +- if (loongarch_tune_info == 0) +- loongarch_set_tune (loongarch_arch_info); ++ for (i = 0; i < nelt; ++i) ++ rperm[i] = GEN_INT (perm[i]); + +- if ((target_flags_explicit & MASK_64BIT) == 0) +- { +- /* Infer the integer register size from the ABI and processor. +- Restrict ourselves to 32-bit registers if that's all the +- processor has, or if the ABI cannot handle 64-bit registers. */ +- if (loongarch_abi == ABILP32) +- target_flags &= ~MASK_64BIT; +- else +- target_flags |= MASK_64BIT; +- } ++ x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); ++ x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); ++ x = gen_rtx_SET (target, x); + +- if ((target_flags_explicit & MASK_FLOAT64) != 0) +- { +- if (TARGET_SINGLE_FLOAT && TARGET_FLOAT64) +- error ("unsupported combination: %s", "-mfp64 -msingle-float"); +- } +- else ++ insn = emit_insn (x); ++ if (recog_memoized (insn) < 0) + { +- /* -msingle-float selects 32-bit float registers. On r6 and later, +- -mdouble-float selects 64-bit float registers, since the old paired +- register model is not supported. In other cases the float registers +- should be the same size as the integer ones. */ +- if (TARGET_64BIT && TARGET_DOUBLE_FLOAT) +- target_flags |= MASK_FLOAT64; +- else if (loongarch_abi == ABILP32 && ISA_HAS_LSX) +- target_flags |= MASK_FLOAT64; +- else +- target_flags &= ~MASK_FLOAT64; ++ remove_insn (insn); ++ return false; + } ++ return true; ++} + +- /* End of code shared with GAS. */ +- +- if (!TARGET_OLDABI) +- flag_pcc_struct_return = 0; ++/* Similar, but generate a vec_concat from op0 and op1 as well. */ + +- /* Decide which rtx_costs structure to use. */ +- if (optimize_size) +- loongarch_cost = &loongarch_rtx_cost_optimize_size; +- else +- loongarch_cost = &loongarch_rtx_cost_data[loongarch_tune]; ++static bool ++loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, ++ const unsigned char *perm, unsigned nelt) ++{ ++ machine_mode v2mode; ++ rtx x; + +- /* If the user hasn't specified a branch cost, use the processor's +- default. */ +- if (loongarch_branch_cost == 0) +- loongarch_branch_cost = loongarch_cost->branch_cost; ++ if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) ++ return false; ++ x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); ++ return loongarch_expand_vselect (target, x, perm, nelt); ++} + +- /* Prefer a call to memcpy over inline code when optimizing for size, +- though see MOVE_RATIO in loongarch.h. */ +- if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0) +- target_flags |= MASK_MEMCPY; ++/* Construct (set target (vec_select op0 (parallel selector))) and ++ return true if that's a valid instruction in the active ISA. */ + +- /* If we have a nonzero small-data limit, check that the -mgpopt +- setting is consistent with the other target flags. */ +- if (loongarch_small_data_threshold > 0) +- { +- if (TARGET_VXWORKS_RTP) +- warning (0, "cannot use small-data accesses for %qs", "-mrtp"); +- } ++static bool ++loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++{ ++ rtx x, elts[MAX_VECT_LEN]; ++ rtvec v; ++ rtx_insn *insn; ++ unsigned i; + +- /* Make sure that when ISA_HAS_LSX is true, TARGET_FLOAT64 and +- TARGET_HARD_FLOAT_ABI and both true. */ +- if (ISA_HAS_LSX && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI)) +- error ("%<-mlsx%> must be used with %<-mfp64%> and %<-mhard-float%>"); ++ if (!ISA_HAS_LSX && !ISA_HAS_LASX) ++ return false; + +- /* If TARGET_LASX, enable TARGET_LSX. */ +- if (TARGET_LASX) +- target_flags |= MASK_LSX; ++ for (i = 0; i < d->nelt; i++) ++ elts[i] = GEN_INT (d->perm[i]); + +- /* .cfi_* directives generate a read-only section, so fall back on +- manual .eh_frame creation if we need the section to be writable. */ +- if (TARGET_WRITABLE_EH_FRAME) +- flag_dwarf2_cfi_asm = 0; ++ v = gen_rtvec_v (d->nelt, elts); ++ x = gen_rtx_PARALLEL (VOIDmode, v); + +- loongarch_init_print_operand_punct (); ++ if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) ++ return false; + +- /* Set up array to map GCC register number to debug register number. +- Ignore the special purpose register numbers. */ ++ x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); ++ x = gen_rtx_SET (d->target, x); + +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ insn = emit_insn (x); ++ if (recog_memoized (insn) < 0) + { +- loongarch_dbx_regno[i] = IGNORED_DWARF_REGNUM; +- if (GP_REG_P (i) || FP_REG_P (i)) +- loongarch_dwarf_regno[i] = i; +- else +- loongarch_dwarf_regno[i] = INVALID_REGNUM; ++ remove_insn (insn); ++ return false; + } ++ return true; ++} + +- start = GP_DBX_FIRST - GP_REG_FIRST; +- for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++) +- loongarch_dbx_regno[i] = i + start; +- +- start = FP_DBX_FIRST - FP_REG_FIRST; +- for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) +- loongarch_dbx_regno[i] = i + start; +- +- /* Set up loongarch_hard_regno_mode_ok. */ +- for (mode = 0; mode < MAX_MACHINE_MODE; mode++) +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- loongarch_hard_regno_mode_ok_p[mode][regno] +- = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); ++/* Try to simplify a two vector permutation using 2 intra-lane interleave ++ insns and cross-lane shuffle for 32-byte vectors. */ + +- /* Function to allocate machine-dependent function status. */ +- init_machine_status = &loongarch_init_machine_status; +- target_flags &= ~MASK_RELAX_PIC_CALLS; +- +- /* We register a second machine specific reorg pass after delay slot +- filling. Registering the pass must be done at start up. It's +- convenient to do it here. */ +- opt_pass *new_pass = make_pass_loongarch_machine_reorg2 (g); +- struct register_pass_info insert_pass_loongarch_machine_reorg2 = +- { +- new_pass, /* pass */ +- "dbr", /* reference_pass_name */ +- 1, /* ref_pass_instance_number */ +- PASS_POS_INSERT_AFTER /* po_op */ +- }; +- register_pass (&insert_pass_loongarch_machine_reorg2); ++static bool ++loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d) ++{ ++ unsigned i, nelt; ++ rtx t1,t2,t3; ++ rtx (*gen_high) (rtx, rtx, rtx); ++ rtx (*gen_low) (rtx, rtx, rtx); ++ machine_mode mode = GET_MODE (d->target); + +- loongarch_register_frame_header_opt (); +-} ++ if (d->one_vector_p) ++ return false; ++ if (ISA_HAS_LASX && GET_MODE_SIZE (d->vmode) == 32) ++ ; ++ else ++ return false; + ++ nelt = d->nelt; ++ if (d->perm[0] != 0 && d->perm[0] != nelt / 2) ++ return false; ++ for (i = 0; i < nelt; i += 2) ++ if (d->perm[i] != d->perm[0] + i / 2 ++ || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) ++ return false; + +-/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ ++ if (d->testing_p) ++ return true; + +-static void +-loongarch_conditional_register_usage (void) +-{ +- if (!TARGET_HARD_FLOAT) ++ switch (d->vmode) + { +- AND_COMPL_HARD_REG_SET (accessible_reg_set, +- reg_class_contents[(int) FP_REGS]); +- AND_COMPL_HARD_REG_SET (accessible_reg_set, +- reg_class_contents[(int) ST_REGS]); ++ case E_V32QImode: ++ gen_high = gen_lasx_xvilvh_b; ++ gen_low = gen_lasx_xvilvl_b; ++ break; ++ case E_V16HImode: ++ gen_high = gen_lasx_xvilvh_h; ++ gen_low = gen_lasx_xvilvl_h; ++ break; ++ case E_V8SImode: ++ gen_high = gen_lasx_xvilvh_w; ++ gen_low = gen_lasx_xvilvl_w; ++ break; ++ case E_V4DImode: ++ gen_high = gen_lasx_xvilvh_d; ++ gen_low = gen_lasx_xvilvl_d; ++ break; ++ case E_V8SFmode: ++ gen_high = gen_lasx_xvilvh_w_f; ++ gen_low = gen_lasx_xvilvl_w_f; ++ break; ++ case E_V4DFmode: ++ gen_high = gen_lasx_xvilvh_d_f; ++ gen_low = gen_lasx_xvilvl_d_f; ++ break; ++ default: ++ gcc_unreachable (); + } +-} + +-/* Implement EH_USES. */ +- +-bool +-loongarch_eh_uses (unsigned int regno) +-{ +- return false; ++ t1 = gen_reg_rtx (mode); ++ t2 = gen_reg_rtx (mode); ++ emit_insn (gen_high (t1, d->op0, d->op1)); ++ emit_insn (gen_low (t2, d->op0, d->op1)); ++ if(mode == V4DFmode || mode == V8SFmode) ++ { ++ t3 = gen_reg_rtx (V4DFmode); ++ if (d->perm[0]) ++ emit_insn(gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1), ++ gen_lowpart (V4DFmode, t2),GEN_INT(0x31))); ++ else ++ emit_insn(gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1), ++ gen_lowpart (V4DFmode, t2),GEN_INT(0x20))); ++ } ++ else ++ { ++ t3 = gen_reg_rtx (V4DImode); ++ if (d->perm[0]) ++ emit_insn(gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1), ++ gen_lowpart (V4DImode, t2),GEN_INT(0x31))); ++ else ++ emit_insn(gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1), ++ gen_lowpart (V4DImode, t2),GEN_INT(0x20))); ++ } ++ emit_move_insn (d->target, gen_lowpart (mode, t3)); ++ return true; + } + +-/* Implement EPILOGUE_USES. */ ++/* Implement extract-even and extract-odd permutations.*/ + +-bool +-loongarch_epilogue_uses (unsigned int regno) ++static bool ++loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) + { +- /* Say that the epilogue uses the return address register. Note that +- in the case of sibcalls, the values "used by the epilogue" are +- considered live at the start of the called function. */ +- if (regno == RETURN_ADDR_REGNUM) +- return true; ++ rtx t1; ++ machine_mode mode = GET_MODE (d->target); ++ t1 = gen_reg_rtx (mode); + +- /* An interrupt handler must preserve some registers that are +- ordinarily call-clobbered. */ +- if (cfun->machine->interrupt_handler_p +- && loongarch_interrupt_extra_call_saved_reg_p (regno)) ++ if (d->testing_p) + return true; + +- return false; +-} ++ switch (d->vmode) ++ { ++ case E_V4DFmode: ++ /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */ ++ if (odd) ++ emit_insn (gen_lasx_xvilvh_d_f (t1, d->op0, d->op1)); ++ else ++ emit_insn (gen_lasx_xvilvl_d_f (t1, d->op0, d->op1)); + +-/* Return true if MEM1 and MEM2 use the same base register, and the +- offset of MEM2 equals the offset of MEM1 plus 4. FIRST_REG is the +- register into (from) which the contents of MEM1 will be loaded +- (stored), depending on the value of LOAD_P. +- SWAP_P is true when the 1st and 2nd instructions are swapped. */ ++ /* Shuffle within the 256-bit lanes to produce the result required. ++ { 0 2 4 6 } | { 1 3 5 7 }. */ ++ emit_insn (gen_lasx_xvpermi_d_v4df (d->target, t1, GEN_INT (0xd8))); ++ break; + +-static bool +-loongarch_load_store_pair_p_1 (bool load_p, bool swap_p, +- rtx first_reg, rtx mem1, rtx mem2) +-{ +- rtx base1, base2; +- HOST_WIDE_INT offset1, offset2; ++ case E_V4DImode: ++ if (odd) ++ emit_insn (gen_lasx_xvilvh_d (t1, d->op0, d->op1)); ++ else ++ emit_insn (gen_lasx_xvilvl_d (t1, d->op0, d->op1)); + +- if (!MEM_P (mem1) || !MEM_P (mem2)) +- return false; ++ emit_insn (gen_lasx_xvpermi_d_v4di (d->target, t1, GEN_INT (0xd8))); ++ break; + +- loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); +- loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); ++ case E_V8SFmode: ++ /* Shuffle the lanes around into: ++ { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */ ++ if (odd) ++ emit_insn (gen_lasx_xvpickod_w_f (t1, d->op0, d->op1)); ++ else ++ emit_insn (gen_lasx_xvpickev_w_f (t1, d->op0, d->op1)); + +- if (!REG_P (base1) || !rtx_equal_p (base1, base2)) +- return false; ++ /* Shuffle within the 256-bit lanes to produce the result required. ++ { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */ ++ emit_insn (gen_lasx_xvpermi_d_v8sf (d->target, t1, GEN_INT (0xd8))); ++ break; + +- /* Avoid invalid load pair instructions. */ +- if (load_p && REGNO (first_reg) == REGNO (base1)) +- return false; ++ case E_V8SImode: ++ if (odd) ++ emit_insn (gen_lasx_xvpickod_w (t1, d->op0, d->op1)); ++ else ++ emit_insn (gen_lasx_xvpickev_w (t1, d->op0, d->op1)); + +- /* We must avoid this case for anti-dependence. +- Ex: lw $3, 4($3) +- lw $2, 0($3) +- first_reg is $2, but the base is $3. */ +- if (load_p +- && swap_p +- && REGNO (first_reg) + 1 == REGNO (base1)) +- return false; ++ emit_insn (gen_lasx_xvpermi_d_v8si (d->target, t1, GEN_INT (0xd8))); ++ break; + +- if (offset2 != offset1 + 4) +- return false; ++ case E_V16HImode: ++ if (odd) ++ emit_insn (gen_lasx_xvpickod_h (t1, d->op0, d->op1)); ++ else ++ emit_insn (gen_lasx_xvpickev_h (t1, d->op0, d->op1)); + +- if (!ULARCH_12BIT_OFFSET_P (offset1)) +- return false; ++ emit_insn (gen_lasx_xvpermi_d_v16hi (d->target, t1, GEN_INT (0xd8))); ++ break; + +- return true; +-} ++ case E_V32QImode: ++ if (odd) ++ emit_insn (gen_lasx_xvpickod_b (t1, d->op0, d->op1)); ++ else ++ emit_insn (gen_lasx_xvpickev_b (t1, d->op0, d->op1)); + +-bool +-loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p) +-{ +- rtx reg1, reg2, mem1, mem2, base1, base2; +- enum reg_class rc1, rc2; +- HOST_WIDE_INT offset1, offset2; ++ emit_insn (gen_lasx_xvpermi_d_v32qi (d->target, t1, GEN_INT (0xd8))); ++ break; + +- if (load_p) +- { +- reg1 = operands[0]; +- reg2 = operands[2]; +- mem1 = operands[1]; +- mem2 = operands[3]; +- } +- else +- { +- reg1 = operands[1]; +- reg2 = operands[3]; +- mem1 = operands[0]; +- mem2 = operands[2]; ++ default: ++ gcc_unreachable (); + } + +- if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0 +- || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0) +- return false; +- +- loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); +- loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); +- +- /* Base regs do not match. */ +- if (!REG_P (base1) || !rtx_equal_p (base1, base2)) +- return false; ++ return true; ++} + +- /* Either of the loads is clobbering base register. It is legitimate to bond +- loads if second load clobbers base register. However, hardware does not +- support such bonding. */ +- if (load_p +- && (REGNO (reg1) == REGNO (base1) +- || (REGNO (reg2) == REGNO (base1)))) +- return false; ++/* Pattern match extract-even and extract-odd permutations. */ + +- /* Loading in same registers. */ +- if (load_p +- && REGNO (reg1) == REGNO (reg2)) ++static bool ++loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) ++{ ++ unsigned i, odd, nelt = d->nelt; ++ if(!ISA_HAS_LASX) + return false; + +- /* The loads/stores are not of same type. */ +- rc1 = REGNO_REG_CLASS (REGNO (reg1)); +- rc2 = REGNO_REG_CLASS (REGNO (reg2)); +- if (rc1 != rc2 +- && !reg_class_subset_p (rc1, rc2) +- && !reg_class_subset_p (rc2, rc1)) ++ odd = d->perm[0]; ++ if (odd != 0 && odd != 1) + return false; + +- if (abs (offset1 - offset2) != GET_MODE_SIZE (mode)) +- return false; ++ for (i = 1; i < nelt; ++i) ++ if (d->perm[i] != 2 * i + odd) ++ return false; + +- return true; ++ return loongarch_expand_vec_perm_even_odd_1 (d, odd); + } + +-/* OPERANDS describes the operands to a pair of SETs, in the order +- dest1, src1, dest2, src2. Return true if the operands can be used +- in an LWP or SWP instruction; LOAD_P says which. */ ++/* Expand a variable vector permutation for LASX. */ + +-bool +-loongarch_load_store_pair_p (bool load_p, rtx *operands) ++void ++loongarch_expand_vec_perm_1 (rtx operands[]) + { +- rtx reg1, reg2, mem1, mem2; ++ rtx target = operands[0]; ++ rtx op0 = operands[1]; ++ rtx op1 = operands[2]; ++ rtx mask = operands[3]; ++ bool one_operand_shuffle = rtx_equal_p (op0, op1); ++ rtx t1, t2, t3, t4, t5, t6, vt, vec[32]; ++ machine_mode mode = GET_MODE (op0); ++ machine_mode maskmode = GET_MODE (mask); ++ int w, i; ++ ++ /* Number of elements in the vector. */ ++ w = GET_MODE_NUNITS (mode); ++ ++ if (mode == V4DImode || mode == V4DFmode) ++ { ++ maskmode = mode = V8SImode; ++ w = 8; ++ t1 = gen_reg_rtx (maskmode); ++ ++ /* Replicate the low bits of the V4DImode mask into V8SImode: ++ mask = { A B C D } ++ t1 = { A A B B C C D D }. */ ++ for (i = 0; i < w / 2; ++i) ++ vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); ++ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); ++ vt = force_reg (maskmode, vt); ++ mask = gen_lowpart (maskmode, mask); ++ emit_insn (gen_lasx_xvperm_w (t1, mask, vt)); ++ ++ /* Multiply the shuffle indicies by two. */ ++ t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, ++ OPTAB_DIRECT); ++ ++ /* Add one to the odd shuffle indicies: ++ t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ ++ for (i = 0; i < w / 2; ++i) ++ { ++ vec[i * 2] = const0_rtx; ++ vec[i * 2 + 1] = const1_rtx; ++ } ++ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); ++ vt = validize_mem (force_const_mem (maskmode, vt)); ++ t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, ++ OPTAB_DIRECT); + +- if (load_p) +- { +- reg1 = operands[0]; +- reg2 = operands[2]; +- mem1 = operands[1]; +- mem2 = operands[3]; ++ /* Continue as if V8SImode (resp. V32QImode) was used initially. */ ++ operands[3] = mask = t1; ++ target = gen_reg_rtx (mode); ++ op0 = gen_lowpart (mode, op0); ++ op1 = gen_lowpart (mode, op1); + } +- else ++ switch (mode) + { +- reg1 = operands[1]; +- reg2 = operands[3]; +- mem1 = operands[0]; +- mem2 = operands[2]; ++ case E_V8SImode: ++ if (one_operand_shuffle) ++ { ++ emit_insn (gen_lasx_xvperm_w (target, op0, mask)); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++ } ++ else ++ { ++ t1 = gen_reg_rtx (V8SImode); ++ t2 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvperm_w (t1, op0, mask)); ++ emit_insn (gen_lasx_xvperm_w (t2, op1, mask)); ++ goto merge_two; ++ } ++ return; ++ ++ case E_V8SFmode: ++ mask = gen_lowpart (V8SImode, mask); ++ if (one_operand_shuffle) ++ emit_insn (gen_lasx_xvperm_w_f (target, op0, mask)); ++ else ++ { ++ t1 = gen_reg_rtx (V8SFmode); ++ t2 = gen_reg_rtx (V8SFmode); ++ emit_insn (gen_lasx_xvperm_w_f (t1, op0, mask)); ++ emit_insn (gen_lasx_xvperm_w_f (t2, op1, mask)); ++ goto merge_two; ++ } ++ return; ++ ++ case E_V16HImode: ++ if (one_operand_shuffle) ++ { ++ t1 = gen_reg_rtx (V16HImode); ++ t2 = gen_reg_rtx (V16HImode); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (t1, op0, GEN_INT(0x44))); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (t2, op0, GEN_INT(0xee))); ++ emit_insn (gen_lasx_xvshuf_h (target, mask, t2, t1)); ++ } ++ else ++ { ++ t1 = gen_reg_rtx (V16HImode); ++ t2 = gen_reg_rtx (V16HImode); ++ t3 = gen_reg_rtx (V16HImode); ++ t4 = gen_reg_rtx (V16HImode); ++ t5 = gen_reg_rtx (V16HImode); ++ t6 = gen_reg_rtx (V16HImode); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (t3, op0, GEN_INT(0x44))); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (t4, op0, GEN_INT(0xee))); ++ emit_insn (gen_lasx_xvshuf_h (t1, mask, t4, t3)); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (t5, op1, GEN_INT(0x44))); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (t6, op1, GEN_INT(0xee))); ++ emit_insn (gen_lasx_xvshuf_h (t2, mask, t6, t5)); ++ goto merge_two; ++ } ++ return; ++ ++ case E_V32QImode: ++ if (one_operand_shuffle) ++ { ++ t1 = gen_reg_rtx (V32QImode); ++ t2 = gen_reg_rtx (V32QImode); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (t1, op0, GEN_INT(0x44))); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (t2, op0, GEN_INT(0xee))); ++ emit_insn (gen_lasx_xvshuf_b (target, t2, t1, mask)); ++ } ++ else ++ { ++ t1 = gen_reg_rtx (V32QImode); ++ t2 = gen_reg_rtx (V32QImode); ++ t3 = gen_reg_rtx (V32QImode); ++ t4 = gen_reg_rtx (V32QImode); ++ t5 = gen_reg_rtx (V32QImode); ++ t6 = gen_reg_rtx (V32QImode); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (t3, op0, GEN_INT(0x44))); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (t4, op0, GEN_INT(0xee))); ++ emit_insn (gen_lasx_xvshuf_b (t1, t4, t3, mask)); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (t5, op1, GEN_INT(0x44))); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (t6, op1, GEN_INT(0xee))); ++ emit_insn (gen_lasx_xvshuf_b (t2, t6, t5, mask)); ++ goto merge_two; ++ } ++ return; ++ ++ default: ++ gcc_assert (GET_MODE_SIZE (mode) == 32); ++ break; + } + +- if (REGNO (reg2) == REGNO (reg1) + 1) +- return loongarch_load_store_pair_p_1 (load_p, false, reg1, mem1, mem2); ++merge_two: ++ /* Then merge them together. The key is whether any given control ++ element contained a bit set that indicates the second word. */ ++ rtx xops[6]; ++ mask = operands[3]; ++ vt = GEN_INT (w); ++ vt = gen_const_vec_duplicate (maskmode, vt); ++ vt = force_reg (maskmode, vt); ++ if (GET_MODE (target) != mode) ++ target = gen_reg_rtx (mode); ++ xops[0] = target; ++ xops[1] = gen_lowpart (mode, t2); ++ xops[2] = gen_lowpart (mode, t1); ++ xops[3] = gen_rtx_GE (maskmode, mask, vt); ++ xops[4] = mask; ++ xops[5] = vt; ++ ++ loongarch_expand_vec_cond_expr (mode, maskmode, xops); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++} + +- if (REGNO (reg1) == REGNO (reg2) + 1) +- return loongarch_load_store_pair_p_1 (load_p, true, reg2, mem2, mem1); ++void ++loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) ++{ ++ machine_mode vmode = GET_MODE (target); + +- return false; ++ gcc_checking_assert (vmode == E_V16QImode ++ || vmode == E_V2DImode || vmode == E_V2DFmode ++ || vmode == E_V4SImode || vmode == E_V4SFmode ++ || vmode == E_V8HImode); ++ gcc_checking_assert (GET_MODE (op0) == vmode); ++ gcc_checking_assert (GET_MODE (op1) == vmode); ++ gcc_checking_assert (GET_MODE (sel) == vmode); ++ gcc_checking_assert (ISA_HAS_LSX); ++ ++ switch (vmode) ++ { ++ case E_V16QImode: ++ emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); ++ break; ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0)); ++ break; ++ case E_V2DImode: ++ emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0)); ++ break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0)); ++ break; ++ case E_V4SImode: ++ emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0)); ++ break; ++ case E_V8HImode: ++ emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0)); ++ break; ++ default: ++ break; ++ } + } + +-/* Return true if REG1 and REG2 match the criteria for a movep insn. */ ++static bool ++loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) ++{ ++ int i; ++ rtx target, op0, op1, sel, tmp; ++ rtx rperm[MAX_VECT_LEN]; + +-bool +-loongarch_movep_target_p (rtx reg1, rtx reg2) +-{ +- int regno1, regno2, pair; +- unsigned int i; +- static const int match[8] = { +- 0x00000060, /* 5, 6 */ +- 0x000000a0, /* 5, 7 */ +- 0x000000c0, /* 6, 7 */ +- 0x00200010, /* 4, 21 */ +- 0x00400010, /* 4, 22 */ +- 0x00000030, /* 4, 5 */ +- 0x00000050, /* 4, 6 */ +- 0x00000090 /* 4, 7 */ +- }; +- +- if (!REG_P (reg1) || !REG_P (reg2)) +- return false; ++ if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode ++ || d->vmode == E_V4SImode || d->vmode == E_V4SFmode ++ || d->vmode == E_V8HImode || d->vmode == E_V16QImode) ++ { ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; + +- regno1 = REGNO (reg1); +- regno2 = REGNO (reg2); ++ if (GET_MODE (op0) != GET_MODE (op1) ++ || GET_MODE (op0) != GET_MODE (target)) ++ return false; + +- if (!GP_REG_P (regno1) || !GP_REG_P (regno2)) +- return false; ++ if (d->testing_p) ++ return true; + +- pair = (1 << regno1) | (1 << regno2); ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ rperm[i] = GEN_INT (d->perm[i]); ++ } + +- for (i = 0; i < ARRAY_SIZE (match); i++) +- if (pair == match[i]) +- return true; ++ if (d->vmode == E_V2DFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else if (d->vmode == E_V4SFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else ++ { ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); ++ emit_move_insn (d->target, sel); ++ } ++ ++ switch (d->vmode) ++ { ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V2DImode: ++ emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); ++ break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V4SImode: ++ emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); ++ break; ++ case E_V8HImode: ++ emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); ++ break; ++ case E_V16QImode: ++ emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ break; ++ } + ++ return true; ++ } + return false; + } +- +-/* Return the size in bytes of the trampoline code, padded to +- TRAMPOLINE_ALIGNMENT bits. The static chain pointer and target +- function address immediately follow. */ + +-int +-loongarch_trampoline_code_size (void) ++static bool ++loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) + { +- return 4 * 4; +-} ++ unsigned int i, nelt = d->nelt; ++ unsigned char perm2[MAX_VECT_LEN]; + +-/* Implement TARGET_TRAMPOLINE_INIT. */ ++ if (d->one_vector_p) ++ { ++ /* Try interleave with alternating operands. */ ++ memcpy (perm2, d->perm, sizeof(perm2)); ++ for (i = 1; i < nelt; i += 2) ++ perm2[i] += nelt; ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) ++ return true; ++ } ++ else ++ { ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, nelt)) ++ return true; + +-static void +-loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +-{ +- rtx addr, end_addr, high, low, opcode, mem; +- rtx trampoline[8]; +- unsigned int i, j; +- HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset; ++ /* Try again with swapped operands. */ ++ for (i = 0; i < nelt; ++i) ++ perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) ++ return true; ++ } + +- /* Work out the offsets of the pointers from the start of the +- trampoline code. */ +- end_addr_offset = loongarch_trampoline_code_size (); +- static_chain_offset = end_addr_offset; +- target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode); ++ if (loongarch_expand_lsx_shuffle (d)) ++ return true; ++ if (loongarch_expand_vec_perm_even_odd(d)) ++ return true; ++ if (loongarch_expand_vec_perm_interleave(d)) ++ return true; ++ return false; ++} + +- /* Get pointers to the beginning and end of the code block. */ +- addr = force_reg (Pmode, XEXP (m_tramp, 0)); +- end_addr = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset)); ++// Following are the assist function for const vector permutation support. ++static bool ++loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) ++{ ++ if (d->perm[0] >= d->nelt / 2) ++ return false; + +-#define OP(X) gen_int_mode (X, SImode) ++ bool result = true; ++ unsigned char lhs = d->perm[0]; ++ unsigned char rhs = d->perm[d->nelt / 2]; + +- /* Build up the code in TRAMPOLINE. */ +- i = 0; +- /* +- pcaddi $static_chain,0 +- ld.[dw] $tmp,$static_chain,target_function_offset +- ld.[dw] $static_chain,$static_chain,static_chain_offset +- jirl $r0,$tmp,0 +- */ +- trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST)); +- trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) +- | 19 /* $t7 */ +- | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) +- | ((target_function_offset & 0xfff) << 10)); +- trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) +- | (STATIC_CHAIN_REGNUM - GP_REG_FIRST) +- | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) +- | ((static_chain_offset & 0xfff) << 10)); +- trampoline[i++] = OP (0x4c000000 | (19 << 5)); +-#undef OP ++ if ((rhs - lhs) != d->nelt / 2) ++ return false; + +- for (j = 0; j < i; j++) ++ for (int i = 1; i < d->nelt; i += 1) + { +- mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode)); +- loongarch_emit_move (mem, trampoline[j]); ++ if ((i < d->nelt / 2) && (d->perm[i] != lhs)) ++ { ++ result = false; ++ break; ++ } ++ if ((i > d->nelt / 2) && (d->perm[i] != rhs)) ++ { ++ result = false; ++ break; ++ } + } + +- /* Set up the static chain pointer field. */ +- mem = adjust_address (m_tramp, ptr_mode, static_chain_offset); +- loongarch_emit_move (mem, chain_value); +- +- /* Set up the target function field. */ +- mem = adjust_address (m_tramp, ptr_mode, target_function_offset); +- loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0)); +- +- /* Flush the code part of the trampoline. */ +- emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE))); +- emit_insn (gen_clear_cache (addr, end_addr)); ++ return result; + } + +- +-/* Implement TARGET_SHIFT_TRUNCATION_MASK. We want to keep the default +- behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even +- when TARGET_LOONGSON_MMI is true. */ +- +-static unsigned HOST_WIDE_INT +-loongarch_shift_truncation_mask (machine_mode mode) ++static bool ++loongarch_is_double_duplicate (struct expand_vec_perm_d *d) + { +- return GET_MODE_BITSIZE (mode) - 1; +-} ++ if (!d->one_vector_p) ++ return false; + +- +-/* Generate or test for an insn that supports a constant permutation. */ ++ if (d->nelt < 8) ++ return false; + +-#define MAX_VECT_LEN 32 ++ bool result = true; ++ unsigned char buf = d->perm[0]; + +-struct expand_vec_perm_d +-{ +- rtx target, op0, op1; +- unsigned char perm[MAX_VECT_LEN]; +- machine_mode vmode; +- unsigned char nelt; +- bool one_vector_p; +- bool testing_p; +-}; ++ for (int i = 1; i < d->nelt; i += 2) ++ { ++ if (d->perm[i] != buf) ++ { ++ result = false; ++ break; ++ } ++ if (d->perm[i - 1] != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += d->nelt / 4; ++ } + +-/* Construct (set target (vec_select op0 (parallel perm))) and +- return true if that's a valid instruction in the active ISA. */ ++ return result; ++} + + static bool +-loongarch_expand_vselect (rtx target, rtx op0, +- const unsigned char *perm, unsigned nelt) ++loongarch_is_odd_extraction (struct expand_vec_perm_d *d) + { +- rtx rperm[MAX_VECT_LEN], x; +- rtx_insn *insn; +- unsigned i; ++ bool result = true; ++ unsigned char buf = 1; + +- for (i = 0; i < nelt; ++i) +- rperm[i] = GEN_INT (perm[i]); ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 2; ++ } + +- x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); +- x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); +- x = gen_rtx_SET (target, x); ++ return result; ++} + +- insn = emit_insn (x); +- if (recog_memoized (insn) < 0) ++static bool ++loongarch_is_even_extraction (struct expand_vec_perm_d *d) ++{ ++ bool result = true; ++ unsigned char buf = 0; ++ ++ for (int i = 0; i < d->nelt; i += 1) + { +- remove_insn (insn); +- return false; ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 2; + } +- return true; +-} + +-/* Similar, but generate a vec_concat from op0 and op1 as well. */ ++ return result; ++} + + static bool +-loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, +- const unsigned char *perm, unsigned nelt) ++loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + { +- machine_mode v2mode; +- rtx x; ++ bool result = true; ++ unsigned char buf = d->perm[0]; + +- if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) ++ if (buf != 0 || buf != d->nelt) + return false; +- x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); +- return loongarch_expand_vselect (target, x, perm, nelt); +-} + +-/* Construct (set target (vec_select op0 (parallel selector))) and +- return true if that's a valid instruction in the active ISA. */ ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ ++ return result; ++} + + static bool +-loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++loongarch_is_center_extraction (struct expand_vec_perm_d *d) + { +- rtx x, elts[MAX_VECT_LEN]; +- rtvec v; +- rtx_insn *insn; +- unsigned i; ++ bool result = true; ++ unsigned buf = d->nelt / 2; + +- if (!ISA_HAS_LSX && !ISA_HAS_LASX) ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_reversing_permutation (struct expand_vec_perm_d *d) ++{ ++ if (!d->one_vector_p) + return false; + +- for (i = 0; i < d->nelt; i++) +- elts[i] = GEN_INT (d->perm[i]); ++ bool result = true; ++ unsigned char buf = d->nelt - 1; + +- v = gen_rtvec_v (d->nelt, elts); +- x = gen_rtx_PARALLEL (VOIDmode, v); ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (d->perm[i] != buf) ++ { ++ result = false; ++ break; ++ } + +- if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) ++ buf -= 1; ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_di_misalign_extract (struct expand_vec_perm_d *d) ++{ ++ if (d->nelt != 4 && d->nelt != 8) + return false; + +- x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); +- x = gen_rtx_SET (d->target, x); ++ bool result = true; ++ unsigned char buf; + +- insn = emit_insn (x); +- if (recog_memoized (insn) < 0) ++ if (d->nelt == 4) + { +- remove_insn (insn); +- return false; ++ buf = 1; ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ ++ buf += 1; ++ } ++ } ++ else if (d->nelt == 8) ++ { ++ buf = 2; ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ ++ buf += 1; ++ } ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_si_misalign_extract (struct expand_vec_perm_d *d) ++{ ++ if (d->vmode != E_V8SImode && d->vmode != E_V8SFmode) ++ return false; ++ bool result = true; ++ unsigned char buf = 1; ++ ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d) ++{ ++ bool result = true; ++ unsigned char buf = 0; ++ ++ for (int i = 0;i < d->nelt; i += 2) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ ++ if (result) ++ { ++ buf = d->nelt; ++ for (int i = 1; i < d->nelt; i += 2) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d) ++{ ++ if (d->vmode != E_V32QImode) ++ return false; ++ bool result = true; ++ unsigned char buf = 0; ++ ++#define COMPARE_SELECTOR(INIT, BEGIN, END) \ ++ buf = INIT; \ ++ for (int i = BEGIN; i < END && result; i += 1) \ ++ { \ ++ if (buf != d->perm[i]) \ ++ { \ ++ result = false; \ ++ break; \ ++ } \ ++ buf += 1; \ ++ } ++ ++ COMPARE_SELECTOR (0, 0, 8); ++ COMPARE_SELECTOR (32, 8, 16); ++ COMPARE_SELECTOR (8, 16, 24); ++ COMPARE_SELECTOR (40, 24, 32); ++ ++#undef COMPARE_SELECTOR ++ return result; ++} ++ ++static bool ++loongarch_is_lasx_lowpart_extract (struct expand_vec_perm_d *d) ++{ ++ bool result = true; ++ unsigned char buf = 0; ++ ++ for (int i = 0; i < d->nelt / 2; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ ++ if (result) ++ { ++ buf = d->nelt; ++ for (int i = d->nelt / 2; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d) ++{ ++ bool result = true; ++ unsigned char buf = d->nelt / 2; ++ ++ for (int i = 0; i < d->nelt; i += 2) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ ++ if (result) ++ { ++ buf = d->nelt + d->nelt / 2; ++ for (int i = 1; i < d->nelt;i += 2) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ buf += 1; ++ } ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_lasx_highpart_interleave_2 (struct expand_vec_perm_d *d) ++{ ++ if (d->vmode != E_V32QImode) ++ return false; ++ ++ bool result = true; ++ unsigned char buf = 0; ++ ++#define COMPARE_SELECTOR(INIT, BEGIN, END) \ ++ buf = INIT; \ ++ for (int i = BEGIN; i < END && result; i += 1) \ ++ { \ ++ if (buf != d->perm[i]) \ ++ { \ ++ result = false; \ ++ break; \ ++ } \ ++ buf += 1; \ ++ } ++ ++ COMPARE_SELECTOR (16, 0, 8); ++ COMPARE_SELECTOR (48, 8, 16); ++ COMPARE_SELECTOR (24, 16, 24); ++ COMPARE_SELECTOR (56, 24, 32); ++ ++#undef COMPARE_SELECTOR ++ return result; ++} ++ ++static bool ++loongarch_is_elem_duplicate (struct expand_vec_perm_d *d) ++{ ++ bool result = true; ++ unsigned char buf = d->perm[0]; ++ ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (buf != d->perm[i]) ++ { ++ result = false; ++ break; ++ } ++ } ++ ++ return result; ++} ++ ++inline bool ++loongarch_is_op_reverse_perm (struct expand_vec_perm_d *d) ++{ ++ return (d->vmode == E_V4DFmode) ++ && d->perm[0] == 2 && d->perm[1] == 3 ++ && d->perm[2] == 0 && d->perm[3] == 1; ++} ++ ++static bool ++loongarch_is_single_op_perm (struct expand_vec_perm_d *d) ++{ ++ bool result = true; ++ ++ for (int i = 0; i < d->nelt; i += 1) ++ { ++ if (d->perm[i] >= d->nelt) ++ { ++ result = false; ++ break; ++ } ++ } ++ ++ return result; ++} ++ ++static bool ++loongarch_is_divisible_perm (struct expand_vec_perm_d *d) ++{ ++ bool result = true; ++ ++ for (int i = 0; i < d->nelt / 2; i += 1) ++ { ++ if (d->perm[i] >= d->nelt) ++ { ++ result = false; ++ break; ++ } ++ } ++ ++ if (result) ++ { ++ for (int i = d->nelt / 2; i < d->nelt; i += 1) ++ { ++ if (d->perm[i] < d->nelt) ++ { ++ result = false; ++ break; ++ } ++ } ++ } ++ ++ return result; ++} ++ ++inline bool ++loongarch_is_triple_stride_extract (struct expand_vec_perm_d *d) ++{ ++ return (d->vmode == E_V4DImode || d->vmode == E_V4DFmode) ++ && d->perm[0] == 1 && d->perm[1] == 4 ++ && d->perm[2] == 7 && d->perm[3] == 0; ++} ++ ++/* In LASX, xvshuf.* insn does not have the behavior that gcc expects when ++ * compiler wants to emit a vector permutation. ++ * ++ * 1. What GCC provides via vectorize_vec_perm_const()'s paramater: ++ * When GCC wants to performs a vector permutation, it provides two op ++ * reigster, one target register, and a selector. ++ * In const vector permutation case, GCC provides selector as a char array ++ * that contains original value; in variable vector permuatation ++ * (performs via vec_perm insn template), it provides a vector register. ++ * We assume that nelt is the elements numbers inside single vector in current ++ * 256bit vector mode. ++ * ++ * 2. What GCC expects to perform: ++ * Two op registers(op0, op1) will "combine" into a 512bit temp vector storage ++ * that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit ++ * is op1, then the elements are indexed as below: ++ * 0 ~ nelt - 1 nelt ~ 2 * nelt - 1 ++ * |-------------------------|-------------------------| ++ * Low 256bit (op0) High 256bit(op1) ++ * For example, the second element in op1(V8SImode) will be indexed with 9. ++ * Selector is a vector that has the same mode and number of elements with ++ * op0,op1 and target, it's look like this: ++ * 0 ~ nelt - 1 ++ * |-------------------------| ++ * 256bit (selector) ++ * It describes which element from 512bit temp vector storage will fit into ++ * target's every element slot. ++ * GCC expects that every element in selector can be ANY indices of 512bit ++ * vector storage(Selector can pick literally any element from op0 and op1, and ++ * then fits into any place of target register). This is also what LSX 128bit ++ * vshuf.* instruction do similarly, so we can handle 128bit vector permutation ++ * by single instruction easily. ++ * ++ * 3. What xvshuf.* instruction does: ++ * In short, it just do TWO 128bit vector permuatation, it's the reason that we ++ * need to do these jobs. We will explain it. ++ * op0, op1, target, and selector will be separate into high 128bit and low ++ * 128bit, and do permutation as the description below: ++ * ++ * a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp ++ * vector storage(TVS1), elements are indexed as below: ++ * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 ++ * |---------------------|---------------------| TVS1 ++ * op0's low 128bit op1's low 128bit ++ * op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the ++ * same way. ++ * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 ++ * |---------------------|---------------------| TVS2 ++ * op0's high 128bit op1's high 128bit ++ * b) Selector's low 128bit describes which elements from TVS1 will fit into ++ * target vector's low 128bit. No TVS2 elements are allowed. ++ * c) Selector's high 128bit describes which elements from TVS2 will fit into ++ * target vector's high 128bit. No TVS1 elements are allowed. ++ * ++ * As we can see, if we want to handle vector permutation correctly, we can ++ * achieve it in three ways: ++ * a) Modify selector's elements, to make sure that every elements can inform ++ * correct value that will put into target vector. ++ b) Generate extra instruction before/after xvshuf.* instruction, for ++ adjusting op vector or target vector, to make sure target vector's value is ++ what GCC expects. ++ c) Use other instructions to process op and put correct result into target. ++ */ ++ ++/* Implementation of constant vector permuatation. This function identifies ++ * recognized pattern of permuation selector argument, and use one or more ++ * instruction(s) to finish the permutation job correctly. For unsupported ++ * patterns, it will return false. */ ++ ++static bool ++loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) ++{ ++ // Although we have the LSX vec_perm template, there's still some ++ // 128bit vector permuatation operations send to vectorize_vec_perm_const. ++ // In this case, we just simpliy wrap them by single vshuf.* instruction, ++ // because LSX vshuf.* instruction just have the same behavior that GCC ++ // expects. ++ if (d->vmode != E_V32QImode && d->vmode != E_V16HImode ++ && d->vmode != E_V4DImode && d->vmode != E_V4DFmode ++ && d->vmode != E_V8SImode && d->vmode != E_V8SFmode) ++ return loongarch_try_expand_lsx_vshuf_const (d); ++ ++ bool ok = false, reverse_hi_lo = false, extract_ev_od = false, ++ use_alt_op = false; ++ unsigned char idx; ++ int i; ++ rtx target, op0, op1, sel, tmp; ++ rtx op0_alt = NULL_RTX, op1_alt = NULL_RTX; ++ rtx rperm[MAX_VECT_LEN]; ++ unsigned char remapped[MAX_VECT_LEN]; ++ ++ // Try to figure out whether is a recognized permutation selector pattern, if ++ // yes, we will reassign some elements with new value in selector argument, ++ // and in some cases we will generate some assist insn to complete the ++ // permutation. (Even in some cases, we use other insn to impl permutation ++ // instead of xvshuf!) ++ ++ // Make sure to check d->testing_p is false everytime if you want to emit new ++ // insn, unless you want to crash into ICE directly. ++ if (loongarch_is_quad_duplicate (d)) ++ { ++ // Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 } ++ // copy first elem from original selector to all elem in new selector. ++ idx = d->perm[0]; ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ remapped[i] = idx; ++ } ++ // Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 } ++ } ++ else if (loongarch_is_double_duplicate (d)) ++ { ++ // Selector example: E_V8SImode, { 1, 1, 3, 3, 5, 5, 7, 7 } ++ // one_vector_p == true ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ idx = d->perm[i]; ++ remapped[i] = idx; ++ remapped[i + d->nelt / 2] = idx; ++ } ++ // Selector after: { 1, 1, 3, 3, 1, 1, 3, 3 } ++ } ++ else if (loongarch_is_odd_extraction (d) ++ || loongarch_is_even_extraction (d)) ++ { ++ // Odd extraction selector sample: E_V4DImode, { 1, 3, 5, 7 } ++ // Selector after: { 1, 3, 1, 3 } ++ // Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 } ++ // Selector after: { 0, 2, 0, 2 } ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ idx = d->perm[i]; ++ remapped[i] = idx; ++ remapped[i + d->nelt / 2] = idx; ++ } ++ // Additional insn is required for correct result. See codes below. ++ extract_ev_od = true; ++ } ++ else if (loongarch_is_extraction_permutation (d)) ++ { ++ // Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 } ++ if (d->perm[0] == 0) ++ { ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ remapped[i] = i; ++ remapped[i + d->nelt / 2] = i; ++ } ++ } ++ else ++ { ++ // { 8, 9, 10, 11, 12, 13, 14, 15 } ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ idx = i + d->nelt / 2; ++ remapped[i] = idx; ++ remapped[i + d->nelt / 2] = idx; ++ } ++ } ++ // Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 } ++ // { 8, 9, 10, 11, 8, 9, 10, 11 } ++ } ++ else if (loongarch_is_center_extraction (d)) ++ { ++ // sample: E_V4DImode, { 2, 3, 4, 5 } ++ // In this condition, we can just copy high 128bit of op0 and low 128bit ++ // of op1 to the target register by using xvpermi.q insn. ++ if (!d->testing_p) ++ { ++ emit_move_insn (d->target, d->op1); ++ switch (d->vmode) ++ { ++ case E_V4DImode: ++ emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0, GEN_INT (0x21))); ++ break; ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0, GEN_INT (0x21))); ++ break; ++ case E_V8SImode: ++ emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, d->op0, GEN_INT (0x21))); ++ break; ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, d->op0, GEN_INT (0x21))); ++ break; ++ case E_V16HImode: ++ emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, d->op0, GEN_INT (0x21))); ++ break; ++ case E_V32QImode: ++ emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, d->op0, GEN_INT (0x21))); ++ break; ++ default: ++ break; ++ } ++ } ++ ok = true; ++ // Finish the funtion directly. ++ goto expand_perm_const_2_end; ++ } ++ else if (loongarch_is_reversing_permutation (d)) ++ { ++ // Selector sample: E_V8SImode, { 7, 6, 5, 4, 3, 2, 1, 0 } ++ // one_vector_p == true ++ idx = d->nelt / 2 - 1; ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ remapped[i] = idx; ++ remapped[i + d->nelt / 2] = idx; ++ idx -= 1; ++ } ++ // Selector after: { 3, 2, 1, 0, 3, 2, 1, 0 } ++ // Additional insn will be generated to swap hi and lo 128bit of target ++ // register. ++ reverse_hi_lo = true; ++ } ++ else if (loongarch_is_di_misalign_extract (d) ++ || loongarch_is_si_misalign_extract (d)) ++ { ++ // Selector Sample: ++ // DI misalign: E_V4DImode, { 1, 2, 3, 4 } ++ // SI misalign: E_V8SImode, { 1, 2, 3, 4, 5, 6, 7, 8 } ++ if (!d->testing_p) ++ { ++ // Copy original op0/op1 value to new temp register. ++ // In some cases, operand register may be used in multiple place, so ++ // we need new regiter instead modify original one, to avoid runtime ++ // crashing or wrong value after execution. ++ use_alt_op = true; ++ op1_alt = gen_reg_rtx (d->vmode); ++ emit_move_insn (op1_alt, d->op1); ++ ++ // Adjust op1 for selecting correct value in high 128bit of target ++ // register. ++ // op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 } ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, ++ conv_op0, GEN_INT (0x21))); ++ ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ remapped[i] = d->perm[i]; ++ remapped[i + d->nelt / 2] = d->perm[i]; ++ } ++ // Selector after: ++ // DI misalign: { 1, 2, 1, 2 } ++ // SI misalign: { 1, 2, 3, 4, 1, 2, 3, 4 } ++ } ++ } ++ else if (loongarch_is_lasx_lowpart_interleave (d)) ++ { ++ // Elements from op0's low 18bit and op1's 128bit are inserted into ++ // target register alternately. ++ //sample: E_V4DImode, { 0, 4, 1, 5 } ++ if (!d->testing_p) ++ { ++ // Prepare temp register instead of modify original op. ++ use_alt_op = true; ++ op1_alt = gen_reg_rtx (d->vmode); ++ op0_alt = gen_reg_rtx (d->vmode); ++ emit_move_insn (op1_alt, d->op1); ++ emit_move_insn (op0_alt, d->op0); ++ ++ // Generate subreg for fitting into insn gen function. ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ ++ // Adjust op value in temp register. ++ // op0 = {0,1,2,3}, op1 = {4,5,0,1} ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, ++ conv_op0, GEN_INT (0x02))); ++ // op0 = {0,1,4,5}, op1 = {4,5,0,1} ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, ++ conv_op1, GEN_INT (0x01))); ++ ++ // Remap indices in selector based on the location of index inside ++ // selector, and vector element numbers in current vector mode. ++ ++ // Filling low 128bit of new selector. ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ // value in odd-indexed slot of low 128bit part of selector ++ // vector. ++ remapped[i] = i % 2 != 0 ? d->perm[i] - d->nelt / 2 : d->perm[i]; ++ } ++ // Then filling the high 128bit. ++ for (i = d->nelt / 2; i < d->nelt; i += 1) ++ { ++ // value in even-indexed slot of high 128bit part of ++ // selector vector. ++ remapped[i] = i % 2 == 0 ? d->perm[i] + (d->nelt / 2) * 3 : d->perm[i]; ++ } ++ } ++ } ++ else if (loongarch_is_lasx_lowpart_interleave_2 (d)) ++ { ++ // Special lowpart interleave case in V32QI vector mode. It does the same ++ // thing as we can see in if branch that above this line. ++ // Selector sample: E_V32QImode, ++ // {0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8, 9, 10, ++ // 11, 12, 13, 14, 15, 40, 41, 42, 43, 44, 45, 46, 47} ++ if (!d->testing_p) ++ { ++ // Solution for this case in very simple - covert op into V4DI mode, ++ // and do same thing as previous if branch. ++ op1_alt = gen_reg_rtx (d->vmode); ++ op0_alt = gen_reg_rtx (d->vmode); ++ emit_move_insn (op1_alt, d->op1); ++ emit_move_insn (op0_alt, d->op0); ++ ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, ++ conv_op0, GEN_INT (0x02))); ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, ++ conv_op1, GEN_INT (0x01))); ++ remapped[0] = 0; ++ remapped[1] = 4; ++ remapped[2] = 1; ++ remapped[3] = 5; ++ ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ rperm[i] = GEN_INT (remapped[i]); ++ } ++ ++ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v(4, rperm)); ++ sel = force_reg (E_V4DImode, sel); ++ emit_insn (gen_lasx_xvshuf_d (conv_target, sel, ++ conv_op1, conv_op0)); ++ } ++ ++ ok = true; ++ goto expand_perm_const_2_end; ++ } ++ else if (loongarch_is_lasx_lowpart_extract (d)) ++ { ++ // Copy op0's low 128bit to target's low 128bit, and copy op1's low ++ // 128bit to target's high 128bit. ++ // Selector sample: E_V4DImode, { 0, 1, 4 ,5 } ++ if (!d->testing_p) ++ { ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ ++ // We can achieve the expectation by using sinple xvpermi.q insn. ++ emit_move_insn (conv_target, conv_op1); ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_target, conv_target, ++ conv_op0, GEN_INT(0x20))); ++ } ++ ++ ok = true; ++ goto expand_perm_const_2_end; ++ } ++ else if (loongarch_is_lasx_highpart_interleave (d)) ++ { ++ // Similar to lowpart interleave, elements from op0's high 128bit and ++ // op1's high 128bit are inserted into target regiter alternately. ++ // Selector sample: E_V8SImode, { 4, 12, 5, 13, 6, 14, 7, 15 } ++ if (!d->testing_p) ++ { ++ // Prepare temp op register. ++ use_alt_op = true; ++ op1_alt = gen_reg_rtx (d->vmode); ++ op0_alt = gen_reg_rtx (d->vmode); ++ emit_move_insn (op1_alt, d->op1); ++ emit_move_insn (op0_alt, d->op0); ++ ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ // Adjust op value in temp regiter. ++ // op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, ++ conv_op0, GEN_INT (0x13))); ++ // op0 = { 2, 3, 6, 7 }, op1 = { 6, 7, 2, 3 } ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, ++ conv_op1, GEN_INT (0x01))); ++ // Remap indices in selector based on the location of index inside ++ // selector, and vector element numbers in current vector mode. ++ ++ // Filling low 128bit of new selector. ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ // value in even-indexed slot of low 128bit part of selector ++ // vector. ++ remapped[i] = i % 2 == 0 ? d->perm[i] - d->nelt / 2 : d->perm[i]; ++ } ++ // Then filling the high 128bit. ++ for (i = d->nelt / 2; i < d->nelt; i += 1) ++ { ++ // value in odd-indexed slot of high 128bit part of selector ++ // vector. ++ remapped[i] = i % 2 != 0 ? d->perm[i] - (d->nelt / 2) * 3 : d->perm[i]; ++ } ++ } ++ } ++ else if (loongarch_is_lasx_highpart_interleave_2 (d)) ++ { ++ // Special highpart interleave case in V32QI vector mode. It does the ++ // same thing as the normal version above. ++ // Selector sample: E_V32QImode, ++ // {16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51, 52, 53, 54, 55, 24, ++ // 25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63} ++ if (!d->testing_p) ++ { ++ // Convert op into V4DImode and do the things. ++ op1_alt = gen_reg_rtx (d->vmode); ++ op0_alt = gen_reg_rtx (d->vmode); ++ emit_move_insn (op1_alt, d->op1); ++ emit_move_insn (op0_alt, d->op0); ++ ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, ++ conv_op0, GEN_INT (0x13))); ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, ++ conv_op1, GEN_INT (0x01))); ++ remapped[0] = 2; ++ remapped[1] = 6; ++ remapped[2] = 3; ++ remapped[3] = 7; ++ ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ rperm[i] = GEN_INT (remapped[i]); ++ } ++ ++ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v(4, rperm)); ++ sel = force_reg (E_V4DImode, sel); ++ emit_insn (gen_lasx_xvshuf_d (conv_target, sel, ++ conv_op1, conv_op0)); ++ } ++ ++ ok = true; ++ goto expand_perm_const_2_end; ++ } ++ else if (loongarch_is_elem_duplicate (d)) ++ { ++ // Brocast single element (from op0 or op1) to all slot of target ++ // register. ++ // Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } ++ if (!d->testing_p) ++ { ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx temp_reg = gen_reg_rtx (d->vmode); ++ rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0); ++ ++ emit_move_insn (temp_reg, d->op0); ++ ++ idx = d->perm[0]; ++ // We will use xvrepl128vei.* insn to achieve the result, but we need ++ // to make the high/low 128bit has the same contents that contain the ++ // value that we need to broardcast, because xvrepl128vei does the ++ // broardcast job from every 128bit of source register to ++ // corresponded part of target register! (A deep sigh.) ++ if (/*idx >= 0 &&*/ idx < d->nelt / 2) ++ { ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp, ++ conv_op0, GEN_INT (0x0))); ++ } ++ else if (idx >= d->nelt / 2 && idx < d->nelt) ++ { ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp, ++ conv_op0, GEN_INT (0x11))); ++ idx -= d->nelt / 2; ++ } ++ else if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) ++ { ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp, ++ conv_op1, GEN_INT (0x0))); ++ } ++ else if (idx >= (d->nelt + d->nelt / 2) && idx < d->nelt * 2) ++ { ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp, ++ conv_op1, GEN_INT (0x11))); ++ idx -= d->nelt / 2; ++ } ++ ++ // Then we can finally generate this insn. ++ switch (d->vmode) ++ { ++ case E_V4DImode: ++ emit_insn (gen_lasx_xvrepl128vei_d (d->target, temp_reg, GEN_INT (idx))); ++ break; ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvrepl128vei_d_f (d->target, temp_reg, GEN_INT (idx))); ++ break; ++ case E_V8SImode: ++ emit_insn (gen_lasx_xvrepl128vei_w (d->target, temp_reg, GEN_INT (idx))); ++ break; ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvrepl128vei_w_f (d->target, temp_reg, GEN_INT (idx))); ++ break; ++ case E_V16HImode: ++ emit_insn (gen_lasx_xvrepl128vei_h (d->target, temp_reg, GEN_INT (idx))); ++ break; ++ case E_V32QImode: ++ emit_insn (gen_lasx_xvrepl128vei_b (d->target, temp_reg, GEN_INT(idx))); ++ break; ++ default: ++ gcc_unreachable (); ++ break; ++ } ++ ++ // finish func directly. ++ ok = true; ++ goto expand_perm_const_2_end; ++ } ++ } ++ else if (loongarch_is_op_reverse_perm (d)) ++ { ++ // reverse high 128bit and low 128bit in op0. ++ // Selector sample: E_V4DFmode, { 2, 3, 0, 1 } ++ // Use xvpermi.q for doing this job. ++ if (!d->testing_p) ++ { ++ if (d->vmode == E_V4DImode) ++ { ++ emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0, ++ GEN_INT (0x01))); ++ } ++ else if (d->vmode == E_V4DFmode) ++ { ++ emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0, ++ GEN_INT (0x01))); ++ } ++ else ++ { ++ gcc_unreachable (); ++ } ++ } ++ ++ ok = true; ++ goto expand_perm_const_2_end; ++ } ++ else if (loongarch_is_single_op_perm (d)) ++ { ++ //Permutation that only select elements from op0. ++ if (!d->testing_p) ++ { ++ // Prepare temp register instead of modify original op. ++ use_alt_op = true; ++ op0_alt = gen_reg_rtx (d->vmode); ++ op1_alt = gen_reg_rtx (d->vmode); ++ ++ emit_move_insn (op0_alt, d->op0); ++ emit_move_insn (op1_alt, d->op1); ++ ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ ++ // Duplicate op0's low 128bit in op0, then duplicate high 128bit ++ // in op1. After this, xvshuf.* insn's selector argument can ++ // access all elements we need for correct permutation result. ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op0, ++ GEN_INT (0x00))); ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0, ++ GEN_INT (0x11))); ++ ++ // In this case, there's no need to remap selector's indices. ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ remapped[i] = d->perm[i]; ++ } ++ } ++ } ++ else if (loongarch_is_divisible_perm (d)) ++ { ++ // Divisible perm: ++ // Low 128bit of selector only selects elements of op0, ++ // and high 128bit of selector only selects elements of op1. ++ ++ if (!d->testing_p) ++ { ++ // Prepare temp register instead of modify original op. ++ use_alt_op = true; ++ op0_alt = gen_reg_rtx (d->vmode); ++ op1_alt = gen_reg_rtx (d->vmode); ++ ++ emit_move_insn (op0_alt, d->op0); ++ emit_move_insn (op1_alt, d->op1); ++ ++ rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); ++ ++ // Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure ++ //that selector's low 128bit can access all op0's elements, and ++ //selector's high 128bit can access all op1's elements. ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op1, ++ GEN_INT (0x02))); ++ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0, ++ GEN_INT (0x31))); ++ ++ // No need to modify indices. ++ for (i = 0; i < d->nelt;i += 1) ++ { ++ remapped[i] = d->perm[i]; ++ } ++ } ++ } ++ else if (loongarch_is_triple_stride_extract (d)) ++ { ++ // Selector sample: E_V4DFmode, { 1, 4, 7, 0 } ++ if (!d->testing_p) ++ { ++ // Resolve it with brute force modification. ++ remapped[0] = 1; ++ remapped[1] = 2; ++ remapped[2] = 3; ++ remapped[3] = 0; ++ } ++ } ++ else ++ { ++ // When all of the detections above are failed, we will try last ++ // strategy. ++ // The for loop tries to detect following rules based on indices' value ++ // , its position inside of selector vector ,and strange behavior of xvshuf.* insn; ++ // Then we take corresponding action. (Replace with new value, or give up ++ // whole permutation expansion.) ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ idx = d->perm[i]/* % (2 * d->nelt)*/; ++ ++ // if index is located in low 128bit of selector vector ++ if (i < d->nelt / 2) ++ { ++ // Fail case 1: index tries to reach element that located in op0's ++ // high 128bit. ++ if (idx >= d->nelt / 2 && idx < d->nelt) ++ { ++ goto expand_perm_const_2_end; ++ } ++ // Fail case 2: index tries to reach element that located in ++ // op1's high 128bit. ++ if (idx >= (d->nelt + d->nelt / 2)) ++ { ++ goto expand_perm_const_2_end; ++ } ++ ++ // Success case: index tries to reach elements that located in ++ // op1's low 128bit. Apply - (nelt / 2) offset to original value. ++ if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) ++ { ++ idx -= d->nelt / 2; ++ } ++ } ++ // if index is located in high 128bit of selector vector ++ else ++ { ++ // Fail case 1: index tries to reach element that located in ++ // op1's low 128bit. ++ if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) ++ { ++ goto expand_perm_const_2_end; ++ } ++ // Fail case 2: index tries to reach element that located in ++ // op0's low 128bit. ++ if (idx < (d->nelt / 2)) ++ { ++ goto expand_perm_const_2_end; ++ } ++ // Success case: index tries to reach element that located in ++ // op0's high 128bit. ++ if (idx >= d->nelt / 2 && idx < d->nelt) ++ { ++ idx -= d->nelt / 2; ++ } ++ } ++ // No need to process other case that we did not mentioned. ++ ++ // Assign with original or processed value. ++ remapped[i] = idx; ++ } ++ } ++ ++ ok = true; ++ // If testing_p is true, compiler is trying to figure out that backend can ++ // handle this permutation, but doesn't want to generate actual insn. So if ++ // true, exit directly. ++ if (d->testing_p) ++ { ++ goto expand_perm_const_2_end; ++ } ++ ++ // Convert remapped selector array to RTL array. ++ for (i = 0; i < d->nelt; i += 1) ++ { ++ rperm[i] = GEN_INT (remapped[i]); ++ } ++ ++ // Copy selector vector from memory to vector regiter for later insn gen ++ // function. ++ // if vector's element in floating point value, we cannot fit selector ++ // argument into insn gen function directly, because of the insn template ++ // definition. As a solution, generate a integral mode subreg of target, ++ // then copy selector vector(that is in integral mode) to this subreg. ++ switch (d->vmode) ++ { ++ case E_V4DFmode: ++ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ emit_move_insn (tmp, sel); ++ break; ++ case E_V8SFmode: ++ sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0); ++ emit_move_insn (tmp, sel); ++ break; ++ default: ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); ++ emit_move_insn (d->target, sel); ++ break; + } +- return true; +-} +- +-static bool +-loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +-{ +- unsigned int i, nelt = d->nelt; +- unsigned char perm2[MAX_VECT_LEN]; + +- if (d->one_vector_p) ++ target = d->target; ++ // If temp op registers are requested in previous if branch, then use temp ++ // register intead of original one. ++ if (use_alt_op) + { +- /* Try interleave with alternating operands. */ +- memcpy (perm2, d->perm, sizeof(perm2)); +- for (i = 1; i < nelt; i += 2) +- perm2[i] += nelt; +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) +- return true; ++ op0 = op0_alt != NULL_RTX ? op0_alt : d->op0; ++ op1 = op1_alt != NULL_RTX ? op1_alt : d->op1; + } + else + { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, nelt)) +- return true; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; ++ } + +- /* Try again with swapped operands. */ +- for (i = 0; i < nelt; ++i) +- perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) +- return true; ++ // We FINALLY can generate xvshuf.* insn. ++ switch (d->vmode) ++ { ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V4DImode: ++ emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0)); ++ break; ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V8SImode: ++ emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0)); ++ break; ++ case E_V16HImode: ++ emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0)); ++ break; ++ case E_V32QImode: ++ emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ gcc_unreachable (); ++ break; + } + +- if (loongarch_expand_lsx_shuffle (d)) +- return true; +- return false; ++ // extra insn for swapping the hi/lo 128bit of target vector register. ++ if (reverse_hi_lo) ++ { ++ switch (d->vmode) ++ { ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->target, GEN_INT (0x1))); ++ break; ++ case E_V4DImode: ++ emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->target, GEN_INT (0x1))); ++ break; ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, d->target, GEN_INT (0x1))); ++ break; ++ case E_V8SImode: ++ emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, d->target, GEN_INT (0x1))); ++ break; ++ case E_V16HImode: ++ emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, d->target, GEN_INT (0x1))); ++ break; ++ case E_V32QImode: ++ emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, d->target, GEN_INT (0x1))); ++ break; ++ default: ++ break; ++ } ++ } ++ // extra insn required by odd/even extraction. Swapping the second and third ++ // 64bit in target vector register. ++ else if (extract_ev_od) ++ { ++ rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, GEN_INT (0xD8))); ++ } ++ ++expand_perm_const_2_end: ++ return ok; + } + + /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ +@@ -9043,13 +9312,19 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + ++ ok = loongarch_expand_vec_perm_const_2 (&d); ++ if (ok) ++ return ok; ++ + start_sequence (); + ok = loongarch_expand_vec_perm_const_1 (&d); + end_sequence (); + return ok; + } + +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const_2 (&d); ++ if (!ok) ++ ok = loongarch_expand_vec_perm_const_1 (&d); + + /* If we were given a two-vector permutation which just happened to + have both input vectors equal, we folded this into a one-vector +@@ -9070,16 +9345,18 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + return ok; + } + +-/* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */ +- + static int +-loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, +- machine_mode mode) ++loongarch_cpu_sched_reassociation_width (struct loongarch_target *target, ++ unsigned int opc, machine_mode mode) + { +- switch (loongarch_tune) ++ /* unreferenced argument */ ++ (void) opc; ++ ++ switch (target->cpu_tune) + { +- case PROCESSOR_LOONGARCH64: +- case PROCESSOR_LA464: ++ case CPU_LOONGARCH64: ++ case CPU_LA464: ++ case CPU_LA664: + /* Vector part. */ + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + { +@@ -9094,10 +9371,164 @@ loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, + else if (FLOAT_MODE_P (mode)) + return 4; + break; ++ } ++ ++ /* default is 1 */ ++ return 1; ++} ++ ++/* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */ ++ ++static int ++loongarch_sched_reassociation_width (unsigned int opc, machine_mode mode) ++{ ++ return loongarch_cpu_sched_reassociation_width (&la_target, opc, mode); ++} ++ ++/* Implement extract a scalar element from vecotr register */ ++ ++void ++loongarch_expand_vector_extract (rtx target, rtx vec, int elt) ++{ ++ machine_mode mode = GET_MODE (vec); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx tmp; ++ ++ switch (mode) ++ { ++ case E_V8HImode: ++ case E_V16QImode: ++ break; ++ ++ case E_V32QImode: ++ if (ISA_HAS_LASX) ++ { ++ if (elt >= 16) ++ { ++ tmp = gen_reg_rtx (V32QImode); ++ emit_insn (gen_lasx_xvpermi_d_v32qi (tmp, vec, GEN_INT (0xe))); ++ loongarch_expand_vector_extract (target, gen_lowpart (V16QImode, tmp), elt & 15); ++ } ++ else ++ loongarch_expand_vector_extract (target, gen_lowpart (V16QImode, vec), elt & 15); ++ return; ++ } ++ break; ++ ++ case E_V16HImode: ++ if (ISA_HAS_LASX) ++ { ++ if (elt >= 8) ++ { ++ tmp = gen_reg_rtx (V16HImode); ++ emit_insn (gen_lasx_xvpermi_d_v16hi (tmp, vec, GEN_INT (0xe))); ++ loongarch_expand_vector_extract (target, gen_lowpart (V8HImode, tmp), elt & 7); ++ } ++ else ++ loongarch_expand_vector_extract (target, gen_lowpart (V8HImode, vec), elt & 7); ++ return; ++ } ++ break; ++ + default: + break; + } +- return 1; ++ ++ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); ++ tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); ++ ++ /* Let the rtl optimizers know about the zero extension performed. */ ++ if (inner_mode == QImode || inner_mode == HImode) ++ { ++ tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); ++ target = gen_lowpart (SImode, target); ++ } ++ if (inner_mode == SImode || inner_mode == DImode) ++ { ++ tmp = gen_rtx_SIGN_EXTEND (inner_mode, tmp); ++ } ++ ++ emit_insn (gen_rtx_SET (target, tmp)); ++} ++ ++/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC ++ to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. ++ The upper bits of DEST are undefined, though they shouldn't cause ++ exceptions (some bits from src or all zeros are ok). */ ++ ++static void ++emit_reduc_half (rtx dest, rtx src, int i) ++{ ++ rtx tem, d = dest; ++ switch (GET_MODE (src)) ++ { ++ case E_V4SFmode: ++ tem = gen_lsx_vbsrl_w_f (dest, src, GEN_INT (i == 128 ? 8 : 4)); ++ break; ++ case E_V2DFmode: ++ tem = gen_lsx_vbsrl_d_f (dest, src, GEN_INT (8)); ++ break; ++ case E_V8SFmode: ++ if (i == 256) ++ tem = gen_lasx_xvpermi_d_v8sf (dest, src, GEN_INT (0xe)); ++ else ++ tem = gen_lasx_xvshuf4i_w_f (dest, src, ++ GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); ++ break; ++ case E_V4DFmode: ++ if (i == 256) ++ tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe)); ++ else ++ tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx); ++ break; ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ d = gen_reg_rtx (V4DImode); ++ if (i == 256) ++ tem = gen_lasx_xvpermi_d_v4di (d, gen_lowpart (V4DImode, src), GEN_INT (0xe)); ++ else ++ tem = gen_lasx_xvbsrl_d (d, gen_lowpart (V4DImode, src), GEN_INT (i/16)); ++ break; ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ d = gen_reg_rtx (V2DImode); ++ tem = gen_lsx_vbsrl_d (d, gen_lowpart (V2DImode, src), GEN_INT (i/16)); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ emit_insn (tem); ++ if (d != dest) ++ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); ++} ++ ++/* Expand a vector reduction. FN is the binary pattern to reduce; ++ DEST is the destination; IN is the input vector. */ ++ ++void ++loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) ++{ ++ rtx half, dst, vec = in; ++ machine_mode mode = GET_MODE (in); ++ int i; ++ ++ for (i = GET_MODE_BITSIZE (mode); ++ i > GET_MODE_UNIT_BITSIZE (mode); ++ i >>= 1) ++ { ++ half = gen_reg_rtx (mode); ++ emit_reduc_half (half, vec, i); ++ if (i == GET_MODE_UNIT_BITSIZE (mode) * 2) ++ dst = dest; ++ else ++ dst = gen_reg_rtx (mode); ++ emit_insn (fn (dst, half, vec)); ++ vec = dst; ++ } + } + + /* Expand an integral vector unpack operation. */ +@@ -9110,14 +9541,14 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) + rtx (*extend) (rtx, rtx); + rtx (*cmpFunc) (rtx, rtx, rtx); + rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx); +- rtx tmp, dest, zero; +- machine_mode halfmode = BLKmode; ++ rtx tmp, dest /*, zero */; ++ /* machine_mode halfmode = BLKmode; */ + + if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32) + { + switch (imode) + { +- ++ + case E_V8SImode: + if (unsigned_p) + extend = gen_lasx_vext2xv_du_wu; +@@ -9125,7 +9556,7 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) + extend = gen_lasx_vext2xv_d_w; + swap_hi_lo = gen_lasx_xvpermi_q_v8si; + break; +- ++ + case E_V16HImode: + if (unsigned_p) + extend = gen_lasx_vext2xv_wu_hu; +@@ -9133,7 +9564,7 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) + extend = gen_lasx_vext2xv_w_h; + swap_hi_lo = gen_lasx_xvpermi_q_v16hi; + break; +- ++ + case E_V32QImode: + if (unsigned_p) + extend = gen_lasx_vext2xv_hu_bu; +@@ -9141,7 +9572,7 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) + extend = gen_lasx_vext2xv_h_b; + swap_hi_lo = gen_lasx_xvpermi_q_v32qi; + break; +- ++ + default: + gcc_unreachable (); + break; +@@ -9268,7 +9699,7 @@ loongarch_expand_vector_init (rtx target, rtx vals) + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); + unsigned i, nelt = GET_MODE_NUNITS (vmode); +- unsigned nvar = 0, one_var = -1u; ++ unsigned nvar = 0 /*, one_var = -1u*/ ; + bool all_same = true; + rtx x; + +@@ -9276,7 +9707,7 @@ loongarch_expand_vector_init (rtx target, rtx vals) + { + x = XVECEXP (vals, 0, i); + if (!loongarch_constant_elt_p (x)) +- nvar++, one_var = i; ++ nvar++ /*, one_var = i */ ; + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } +@@ -9311,7 +9742,7 @@ loongarch_expand_vector_init (rtx target, rtx vals) + temp2 = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { +- if(GET_CODE (same) == MEM) ++ if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +@@ -9322,7 +9753,7 @@ loongarch_expand_vector_init (rtx target, rtx vals) + } + else + { +- if(GET_CODE (same) == MEM) ++ if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +@@ -9505,7 +9936,7 @@ loongarch_expand_vector_init (rtx target, rtx vals) + temp2 = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { +- if(GET_CODE (same) == MEM) ++ if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +@@ -9516,7 +9947,7 @@ loongarch_expand_vector_init (rtx target, rtx vals) + } + else + { +- if(GET_CODE (same) == MEM) ++ if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +@@ -9614,9 +10045,8 @@ loongarch_expand_vector_init (rtx target, rtx vals) + /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ + + machine_mode +-loongarch_hard_regno_caller_save_mode (unsigned int regno, +- unsigned int nregs, +- machine_mode mode) ++loongarch_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs, ++ machine_mode mode) + { + /* For performance, avoid saving/restoring upper parts of a register + by returning MODE as save mode when the mode is known. */ +@@ -9785,7 +10215,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); +- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0)); ++ emit_move_insn (xop1, simplify_gen_subreg (vimode, operands[1], ++ GET_MODE (operands[1]), 0)); + } + emit_move_insn (src1, xop1); + } +@@ -9802,7 +10233,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); +- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0)); ++ emit_move_insn (xop2, simplify_gen_subreg (vimode, operands[2], ++ GET_MODE (operands[2]), 0)); + } + emit_move_insn (src2, xop2); + } +@@ -9821,13 +10253,14 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); +- emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0)); ++ emit_move_insn (operands[0], simplify_gen_subreg (mode, mask, ++ GET_MODE (mask), 0)); + } + } + + /* Expand integer vector comparison */ + bool +-loongarch_expand_int_vec_cmp(rtx operands[]) ++loongarch_expand_int_vec_cmp (rtx operands[]) + { + + rtx_code code = GET_CODE (operands[1]); +@@ -9837,7 +10270,7 @@ loongarch_expand_int_vec_cmp(rtx operands[]) + + /* Expand integer vector comparison */ + bool +-loongarch_expand_fp_vec_cmp(rtx operands[]) ++loongarch_expand_fp_vec_cmp (rtx operands[]) + { + rtx_code code = GET_CODE (operands[1]); + loongarch_expand_lsx_cmp (operands[0], code, operands[2], operands[3]); +@@ -9845,61 +10278,16 @@ loongarch_expand_fp_vec_cmp(rtx operands[]) + } + + +-/* Implement TARGET_CASE_VALUES_THRESHOLD. */ +- +-unsigned int +-loongarch_case_values_threshold (void) +-{ +- return default_case_values_threshold (); +-} +- +- + /* Implement TARGET_SPILL_CLASS. */ + + static reg_class_t + loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED) ++ machine_mode mode ATTRIBUTE_UNUSED) + { + return NO_REGS; + } + +-/* Implement TARGET_LRA_P. */ +- +-static bool +-loongarch_lra_p (void) +-{ +- return loongarch_lra_flag; +-} +- +-/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. */ +- +-static reg_class_t +-loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, +- reg_class_t best_class ATTRIBUTE_UNUSED) +-{ +- /* LRA will allocate an FPR for an integer mode pseudo instead of spilling +- to memory if an FPR is present in the allocno class. It is rare that +- we actually need to place an integer mode value in an FPR so where +- possible limit the allocation to GR_REGS. This will slightly pessimize +- code that involves integer to/from float conversions as these will have +- to reload into FPRs in LRA. Such reloads are sometimes eliminated and +- sometimes only partially eliminated. We choose to take this penalty +- in order to eliminate usage of FPRs in code that does not use floating +- point data. +- +- This change has a similar effect to increasing the cost of FPR->GPR +- register moves for integer modes so that they are higher than the cost +- of memory but changing the allocno class is more reliable. +- +- This is also similar to forbidding integer mode values in FPRs entirely +- but this would lead to an inconsistency in the integer to/from float +- instructions that say integer mode values must be placed in FPRs. */ +- if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS) +- return GR_REGS; +- return allocno_class; +-} +- +-/* Implement TARGET_PROMOTE_FUNCTION_MODE */ ++/* Implement TARGET_PROMOTE_FUNCTION_MODE. */ + + /* This function is equivalent to default_promote_function_mode_always_promote + except that it returns a promoted mode even if type is NULL_TREE. This is +@@ -9909,10 +10297,10 @@ loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, + + static machine_mode + loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, +- machine_mode mode, +- int *punsignedp ATTRIBUTE_UNUSED, +- const_tree fntype ATTRIBUTE_UNUSED, +- int for_return ATTRIBUTE_UNUSED) ++ machine_mode mode, ++ int *punsignedp ATTRIBUTE_UNUSED, ++ const_tree fntype ATTRIBUTE_UNUSED, ++ int for_return ATTRIBUTE_UNUSED) + { + int unsignedp; + +@@ -9933,16 +10321,6 @@ loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) + return !TARGET_64BIT || inprec <= 32 || outprec > 32; + } + +-/* Implement TARGET_CONSTANT_ALIGNMENT. */ +- +-static HOST_WIDE_INT +-loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align) +-{ +- if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) +- return MAX (align, BITS_PER_WORD); +- return align; +-} +- + /* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info + for details about the frame layout. */ + +@@ -9963,8 +10341,10 @@ loongarch_la464_128_store_p (rtx operands[]) + int offset1; + rtx dst0 = operands[0]; + rtx dst1 = operands[2]; ++ /* + rtx src0 = operands[1]; + rtx src1 = operands[3]; ++ */ + int base_reg0; + int base_reg1; + +@@ -10030,13 +10410,15 @@ loongarch_la464_128_load_p (rtx operands[]) + int offset0; + int offset1; + rtx dst0 = operands[0]; ++ /* + rtx dst1 = operands[2]; ++ */ + rtx src0 = operands[1]; + rtx src1 = operands[3]; + int base_reg0; + int base_reg1; + int dst_reg0; +- ++ + dst_reg0 = REGNO (dst0); + + if (GET_CODE (XEXP (src0, 0)) == PLUS) +@@ -10209,6 +10591,138 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + return force_reg (vec_mode, v); + } + ++/* Use rsqrte instruction and Newton-Rhapson to compute the approximation of ++ a single precision floating point [reciprocal] square root. */ ++ ++void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) ++{ ++ rtx x0, e0, e1, e2, mhalf, monehalf; ++ REAL_VALUE_TYPE r; ++ machine_mode imode; ++ int unspec; ++ ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ e1 = gen_reg_rtx (mode); ++ e2 = gen_reg_rtx (mode); ++ ++ real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL); ++ mhalf = const_double_from_real_value (r, SFmode); ++ ++ real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1); ++ monehalf = const_double_from_real_value (r, SFmode); ++ unspec = UNSPEC_RSQRTE; ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ mhalf = loongarch_build_const_vector (mode, true, mhalf); ++ monehalf = loongarch_build_const_vector (mode, true, monehalf); ++ if (GET_MODE_SIZE (mode) == 32) ++ imode = mode == V4DFmode ? V4DImode : V8SImode; ++ if (GET_MODE_SIZE (mode) == 16) ++ imode = mode == V2DFmode ? V2DImode : V4SImode; ++ } ++ ++ /* rsqrt(a) = rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) ++ sqrt(a) = a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a))*/ ++ ++ a = force_reg (mode, a); ++ ++ /* x0 = rsqrt(a) estimate */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), ++ unspec))); ++ ++ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ ++ if (!recip) ++ { ++ rtx zero = force_reg (mode, CONST0_RTX(mode)); ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ rtx mask = gen_reg_rtx (imode); ++ emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero))); ++ emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, gen_lowpart(mode, mask)))); ++ } ++ else ++ { ++ rtx target = emit_conditional_move (x0, GT, a, zero, mode, ++ x0, zero, mode, 0); ++ if (target != x0) ++ emit_move_insn (x0, target); ++ } ++ } ++ ++ /* e0 = x0 * a */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); ++ /* e1 = e0 * x0 */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); ++ ++ /* e2 = 1.5 - e1 * 0.5 */ ++ mhalf = force_reg (mode, mhalf); ++ monehalf = force_reg (mode, monehalf); ++ emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, gen_rtx_NEG(mode, e1), mhalf, monehalf))); ++ ++ if (recip) ++ /* res = e2 * x0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2))); ++ else ++ /* res = e2 * e0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0))); ++} ++ ++/* Use recipe instruction and Newton-Rhapson to compute the approximation of ++ a single precision floating point divide. */ ++ ++void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) ++{ ++ rtx x0, x1, e0, mtwo; ++ REAL_VALUE_TYPE r; ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ x1 = gen_reg_rtx (mode); ++ ++ real_arithmetic (&r, ABS_EXPR, &dconst2, NULL); ++ mtwo = const_double_from_real_value (r, SFmode); ++ ++ if (VECTOR_MODE_P (mode)) ++ mtwo = loongarch_build_const_vector (mode, true, mtwo); ++ ++ mtwo = force_reg (mode, mtwo); ++ ++ /* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */ ++ ++ /* x0 = 1./b estimate */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), ++ UNSPEC_RECIPE))); ++ /* 2.0 - b * x0; */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode,gen_rtx_NEG(mode, b), x0, mtwo))); ++ ++ /* x1 = x0 * e0 */ ++ emit_insn (gen_rtx_SET (x1, gen_rtx_MULT (mode, x0, e0))); ++ ++ /* res = a * x1 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); ++} ++ ++/* LoongArch only implements preld hint=0 (prefetch for load) and hint=8 ++ (prefetch for store), other hint just scale to hint = 0 and hint = 1. */ ++ ++rtx ++loongarch_prefetch_cookie (rtx write, rtx locality) ++{ ++ if (INTVAL (locality) == 1 && INTVAL (write) == 0) ++ return GEN_INT (INTVAL (write) + 2); ++ ++ /* store. */ ++ if (INTVAL (write) == 1) ++ return GEN_INT (INTVAL (write) + 7); ++ ++ /* load. */ ++ if (INTVAL (write) == 0) ++ return GEN_INT (INTVAL (write)); ++ ++ gcc_unreachable (); ++} + + + /* Initialize the GCC target structure. */ +@@ -10225,10 +10739,6 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_LEGITIMIZE_ADDRESS + #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address + +-#undef TARGET_ASM_FUNCTION_PROLOGUE +-#define TARGET_ASM_FUNCTION_PROLOGUE loongarch_output_function_prologue +-#undef TARGET_ASM_FUNCTION_EPILOGUE +-#define TARGET_ASM_FUNCTION_EPILOGUE loongarch_output_function_epilogue + #undef TARGET_ASM_SELECT_RTX_SECTION + #define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section + #undef TARGET_ASM_FUNCTION_RODATA_SECTION +@@ -10249,19 +10759,12 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD + #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + loongarch_multipass_dfa_lookahead +-#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P +-#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ +- loongarch_small_register_classes_for_mode_p + + #undef TARGET_FUNCTION_OK_FOR_SIBCALL + #define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall + +-#undef TARGET_INSERT_ATTRIBUTES +-#define TARGET_INSERT_ATTRIBUTES loongarch_insert_attributes +-#undef TARGET_MERGE_DECL_ATTRIBUTES +-#define TARGET_MERGE_DECL_ATTRIBUTES loongarch_merge_decl_attributes +-#undef TARGET_CAN_INLINE_P +-#define TARGET_CAN_INLINE_P loongarch_can_inline_p ++#undef TARGET_GET_DRAP_RTX ++#define TARGET_GET_DRAP_RTX loongarch_get_drap_rtx + + #undef TARGET_VALID_POINTER_MODE + #define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode +@@ -10276,43 +10779,49 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost ++#undef TARGET_VECTORIZE_ADD_STMT_COST ++#define TARGET_VECTORIZE_ADD_STMT_COST loongarch_add_stmt_cost + ++#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT ++#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT loongarch_builtin_support_vector_misalignment ++#undef TARGET_MODE_REP_EXTENDED ++#define TARGET_MODE_REP_EXTENDED loongarch_mode_rep_extended + + #undef TARGET_IN_SMALL_DATA_P + #define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p + +-#undef TARGET_MACHINE_DEPENDENT_REORG +-#define TARGET_MACHINE_DEPENDENT_REORG loongarch_reorg +- +-#undef TARGET_PREFERRED_RELOAD_CLASS ++#undef TARGET_PREFERRED_RELOAD_CLASS + #define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class + +-#undef TARGET_EXPAND_TO_RTL_HOOK +-#define TARGET_EXPAND_TO_RTL_HOOK loongarch_expand_to_rtl_hook +-#undef TARGET_ASM_FILE_START +-#define TARGET_ASM_FILE_START loongarch_file_start + #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE + #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + + #undef TARGET_EXPAND_BUILTIN_VA_START + #define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start + +-#undef TARGET_PROMOTE_FUNCTION_MODE ++#undef TARGET_PROMOTE_FUNCTION_MODE + #define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode + #undef TARGET_RETURN_IN_MEMORY + #define TARGET_RETURN_IN_MEMORY loongarch_return_in_memory + ++#undef TARGET_FUNCTION_VALUE ++#define TARGET_FUNCTION_VALUE loongarch_function_value ++#undef TARGET_LIBCALL_VALUE ++#define TARGET_LIBCALL_VALUE loongarch_libcall_value ++ + #undef TARGET_ASM_OUTPUT_MI_THUNK + #define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk + #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +-#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true ++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ ++ hook_bool_const_tree_hwi_hwi_const_tree_true + + #undef TARGET_PRINT_OPERAND + #define TARGET_PRINT_OPERAND loongarch_print_operand + #undef TARGET_PRINT_OPERAND_ADDRESS + #define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address + #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +-#define TARGET_PRINT_OPERAND_PUNCT_VALID_P loongarch_print_operand_punct_valid_p ++#define TARGET_PRINT_OPERAND_PUNCT_VALID_P \ ++ loongarch_print_operand_punct_valid_p + + #undef TARGET_SETUP_INCOMING_VARARGS + #define TARGET_SETUP_INCOMING_VARARGS loongarch_setup_incoming_varargs +@@ -10344,6 +10853,10 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + loongarch_autovectorize_vector_sizes + ++#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ++#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ ++ loongarch_builtin_vectorized_function ++ + #undef TARGET_INIT_BUILTINS + #define TARGET_INIT_BUILTINS loongarch_init_builtins + #undef TARGET_BUILTIN_DECL +@@ -10351,8 +10864,11 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_EXPAND_BUILTIN + #define TARGET_EXPAND_BUILTIN loongarch_expand_builtin + ++/* The generic ELF target does not always have TLS support. */ ++#ifdef HAVE_AS_TLS + #undef TARGET_HAVE_TLS + #define TARGET_HAVE_TLS HAVE_AS_TLS ++#endif + + #undef TARGET_CANNOT_FORCE_CONST_MEM + #define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem +@@ -10360,35 +10876,24 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_LEGITIMATE_CONSTANT_P + #define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p + +-#undef TARGET_ENCODE_SECTION_INFO +-#define TARGET_ENCODE_SECTION_INFO loongarch_encode_section_info +- +-#undef TARGET_ATTRIBUTE_TABLE +-#define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table + /* All our function attributes are related to how out-of-line copies should + be compiled or called. They don't in themselves prevent inlining. */ + #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P + #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true + + #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +-#define TARGET_USE_BLOCKS_FOR_CONSTANT_P loongarch_use_blocks_for_constant_p +-#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P +-#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p +- +-#undef TARGET_COMP_TYPE_ATTRIBUTES +-#define TARGET_COMP_TYPE_ATTRIBUTES loongarch_comp_type_attributes ++#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true + + #ifdef HAVE_AS_DTPRELWORD + #undef TARGET_ASM_OUTPUT_DWARF_DTPREL + #define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel + #endif +-#undef TARGET_DWARF_REGISTER_SPAN +-#define TARGET_DWARF_REGISTER_SPAN loongarch_dwarf_register_span +-#undef TARGET_DWARF_FRAME_REG_MODE +-#define TARGET_DWARF_FRAME_REG_MODE loongarch_dwarf_frame_reg_mode + + #undef TARGET_LEGITIMATE_ADDRESS_P +-#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p ++#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p ++ ++#undef TARGET_COMPUTE_FRAME_LAYOUT ++#define TARGET_COMPUTE_FRAME_LAYOUT loongarch_compute_frame_info + + #undef TARGET_FRAME_POINTER_REQUIRED + #define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required +@@ -10402,18 +10907,12 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_TRAMPOLINE_INIT + #define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init + +-#undef TARGET_SHIFT_TRUNCATION_MASK +-#define TARGET_SHIFT_TRUNCATION_MASK loongarch_shift_truncation_mask +- + #undef TARGET_VECTORIZE_VEC_PERM_CONST + #define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const + + #undef TARGET_SCHED_REASSOCIATION_WIDTH + #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width + +-#undef TARGET_CASE_VALUES_THRESHOLD +-#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold +- + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV + #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv + +@@ -10422,13 +10921,6 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + + #undef TARGET_SPILL_CLASS + #define TARGET_SPILL_CLASS loongarch_spill_class +-#undef TARGET_LRA_P +-#define TARGET_LRA_P loongarch_lra_p +-#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +-#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS loongarch_ira_change_pseudo_allocno_class +- +-#undef TARGET_HARD_REGNO_SCRATCH_OK +-#define TARGET_HARD_REGNO_SCRATCH_OK loongarch_hard_regno_scratch_ok + + #undef TARGET_HARD_REGNO_NREGS + #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs +@@ -10445,9 +10937,6 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS + #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2 + +-#undef TARGET_SECONDARY_MEMORY_NEEDED +-#define TARGET_SECONDARY_MEMORY_NEEDED loongarch_secondary_memory_needed +- + #undef TARGET_CAN_CHANGE_MODE_CLASS + #define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class + +@@ -10460,6 +10949,9 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + #undef TARGET_STARTING_FRAME_OFFSET + #define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset + ++#undef TARGET_SECONDARY_RELOAD ++#define TARGET_SECONDARY_RELOAD loongarch_secondary_reload ++ + struct gcc_target targetm = TARGET_INITIALIZER; +- ++ + #include "gt-loongarch.h" +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 18d17afb8..1b26230cb 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1,9 +1,7 @@ +-/* Definitions of target machine for GNU compiler. LARCH version. +- Copyright (C) 1989-2018 Free Software Foundation, Inc. +- Contributed by A. Lichnewsky (lich@inria.inria.fr). +- Changed by Michael Meissner (meissner@osf.org). +- 64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and +- Brendan Eich (brendan@microunity.com). ++/* Definitions of target machine for GNU compiler. LoongArch version. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Technology Co. Ltd. ++ Based on MIPS and RISC-V target for GNU compiler. + + This file is part of GCC. + +@@ -21,318 +19,36 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +- +-#include "config/vxworks-dummy.h" +- +-#ifdef GENERATOR_FILE +-/* This is used in some insn conditions, so needs to be declared, but +- does not need to be defined. */ +-extern int target_flags_explicit; +-#endif +- +-/* LARCH external variables defined in loongarch.c. */ +- +-/* Which ABI to use. ABILP32 (original 32, or o32), ABILPX32 (n32), +- ABILP64 (n64) are all defined by SGI. */ +- +-#define ABILP32 0 +-#define ABILPX32 1 +-#define ABILP64 2 +- +-/* Information about one recognized processor. Defined here for the +- benefit of TARGET_CPU_CPP_BUILTINS. */ +-struct loongarch_cpu_info { +- /* The 'canonical' name of the processor as far as GCC is concerned. +- It's typically a manufacturer's prefix followed by a numerical +- designation. It should be lowercase. */ +- const char *name; +- +- /* The internal processor number that most closely matches this +- entry. Several processors can have the same value, if there's no +- difference between them from GCC's point of view. */ +- enum processor cpu; +- +- /* The ISA level that the processor implements. */ +- int isa; +- +- /* A mask of PTF_* values. */ +- unsigned int tune_flags; +-}; ++/* LoongArch external variables defined in loongarch.c. */ + + #include "config/loongarch/loongarch-opts.h" + + /* Macros to silence warnings about numbers being signed in traditional + C and unsigned in ISO C when compiled on 32-bit hosts. */ + +-#define BITMASK_HIGH (((unsigned long)1) << 31) /* 0x80000000 */ +-#define BITMASK_UPPER16 ((unsigned long)0xffff << 16) /* 0xffff0000 */ +-#define BITMASK_LOWER16 ((unsigned long)0xffff) /* 0x0000ffff */ ++#define BITMASK_HIGH (((unsigned long) 1) << 31) /* 0x80000000 */ + +- + /* Run-time compilation parameters selecting different hardware subsets. */ + +-/* True if we are generating position-independent VxWorks RTP code. */ +-#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic) +- +-/* True if we can optimize sibling calls. For simplicity, we only +- handle cases in which call_insn_operand will reject invalid +- sibcall addresses. There are two cases in which this isn't true: +- +- - TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS. call_insn_operand +- accepts global constants, but all sibcalls must be indirect. */ +-#define TARGET_SIBCALLS (1) +- +-/* True if we can use the J and JAL instructions. */ +-#define TARGET_ABSOLUTE_JUMPS (!flag_pic) +- +-/* True if the output must have a writable .eh_frame. +- See ASM_PREFERRED_EH_DATA_FORMAT for details. */ +-#ifdef HAVE_LD_PERSONALITY_RELAXATION +-#define TARGET_WRITABLE_EH_FRAME 0 +-#else +-#define TARGET_WRITABLE_EH_FRAME (flag_pic && TARGET_SHARED) +-#endif +- +- +-/* ISA has LSA available. */ +-#define ISA_HAS_LSA (1) +- +-/* ISA has DLSA available. */ +-#define ISA_HAS_DLSA (TARGET_64BIT) +- +-/* Architecture target defines. */ +-#define TARGET_LOONGARCH64 (loongarch_arch == PROCESSOR_LOONGARCH64) +-#define TUNE_LOONGARCH64 (loongarch_tune == PROCESSOR_LOONGARCH64) +-#define TARGET_LA464 (loongarch_arch == PROCESSOR_LA464) +-#define TUNE_LA464 (loongarch_tune == PROCESSOR_LA464) +-/* True if the pre-reload scheduler should try to create chains of +- multiply-add or multiply-subtract instructions. For example, +- suppose we have: +- +- t1 = a * b +- t2 = t1 + c * d +- t3 = e * f +- t4 = t3 - g * h +- +- t1 will have a higher priority than t2 and t3 will have a higher +- priority than t4. However, before reload, there is no dependence +- between t1 and t3, and they can often have similar priorities. +- The scheduler will then tend to prefer: +- +- t1 = a * b +- t3 = e * f +- t2 = t1 + c * d +- t4 = t3 - g * h +- +- which stops us from making full use of macc/madd-style instructions. +- This sort of situation occurs frequently in Fourier transforms and +- in unrolled loops. +- +- To counter this, the TUNE_MACC_CHAINS code will reorder the ready +- queue so that chained multiply-add and multiply-subtract instructions +- appear ahead of any other instruction that is likely to clobber lo. +- In the example above, if t2 and t3 become ready at the same time, +- the code ensures that t2 is scheduled first. +- +- Multiply-accumulate instructions are a bigger win for some targets +- than others, so this macro is defined on an opt-in basis. */ +-#define TUNE_MACC_CHAINS 0 +- +-#define TARGET_OLDABI (loongarch_abi == ABILP32) +-#define TARGET_NEWABI (loongarch_abi == ABILPX32 || loongarch_abi == ABILP64) +- +-/* TARGET_HARD_FLOAT and TARGET_SOFT_FLOAT reflect whether the FPU is +- directly accessible, while the command-line options select +- TARGET_HARD_FLOAT_ABI and TARGET_SOFT_FLOAT_ABI to reflect the ABI +- in use. */ +-#define TARGET_HARD_FLOAT (TARGET_HARD_FLOAT_ABI) +-#define TARGET_SOFT_FLOAT (TARGET_SOFT_FLOAT_ABI) +- +-/* False if SC acts as a memory barrier with respect to itself, +- otherwise a SYNC will be emitted after SC for atomic operations +- that require ordering between the SC and following loads and +- stores. It does not tell anything about ordering of loads and +- stores prior to and following the SC, only about the SC itself and +- those loads and stores follow it. */ +-#define TARGET_SYNC_AFTER_SC (1) +- +-/* Define preprocessor macros for the -march and -mtune options. +- PREFIX is either _LARCH_ARCH or _LARCH_TUNE, INFO is the selected +- processor. If INFO's canonical name is "foo", define PREFIX to +- be "foo", and define an additional macro PREFIX_FOO. */ +-#define LARCH_CPP_SET_PROCESSOR(PREFIX, INFO) \ +- do \ +- { \ +- char *macro, *p; \ +- \ +- macro = concat ((PREFIX), "_", (INFO)->name, NULL); \ +- for (p = macro; *p != 0; p++) \ +- if (*p == '+') \ +- *p = 'P'; \ +- else \ +- *p = TOUPPER (*p); \ +- \ +- builtin_define (macro); \ +- builtin_define_with_value ((PREFIX), (INFO)->name, 1); \ +- free (macro); \ +- } \ +- while (0) +- + /* Target CPU builtins. */ +-#define TARGET_CPU_CPP_BUILTINS() loongarch_cpu_cpp_builtins (pfile) +- +-/* Target CPU versions for D. */ +-#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions ++#define TARGET_CPU_CPP_BUILTINS() loongarch_cpu_cpp_builtins (pfile) + +-/* Default target_flags if no switches are specified */ +- +-#ifndef TARGET_DEFAULT +-#define TARGET_DEFAULT 0 +-#endif +- +-#ifndef TARGET_CPU_DEFAULT +-#define TARGET_CPU_DEFAULT 0 +-#endif ++/* Default target_flags if no switches are specified. */ + + #ifdef IN_LIBGCC2 + #undef TARGET_64BIT +-/* Make this compile time constant for libgcc2 */ ++/* Make this compile time constant for libgcc2. */ + #ifdef __loongarch64 +-#define TARGET_64BIT 1 ++#define TARGET_64BIT 1 + #else +-#define TARGET_64BIT 0 ++#define TARGET_64BIT 0 + #endif +-#endif /* IN_LIBGCC2 */ ++#endif /* IN_LIBGCC2 */ + + #define TARGET_LIBGCC_SDATA_SECTION ".sdata" + +-#ifndef MULTILIB_ISA_DEFAULT +-#if LARCH_ISA_DEFAULT == 0 +-#define MULTILIB_ISA_DEFAULT "loongarch64" +-#endif +-#endif +- +-#ifndef LARCH_ABI_DEFAULT +-#define LARCH_ABI_DEFAULT ABILP32 +-#endif +- +-/* Use the most portable ABI flag for the ASM specs. */ +- +-#if LARCH_ABI_DEFAULT == ABILP32 +-#define MULTILIB_ABI_DEFAULT "mabi=lp32" +-#elif LARCH_ABI_DEFAULT == ABILP64 +-#define MULTILIB_ABI_DEFAULT "mabi=lp64" +-#endif +- +-#ifndef MULTILIB_DEFAULTS +-#define MULTILIB_DEFAULTS \ +- {MULTILIB_ISA_DEFAULT, MULTILIB_ABI_DEFAULT } +-#endif +- +-/* A spec condition that matches all -loongarch arguments. */ +- +-#define LARCH_ISA_LEVEL_OPTION_SPEC \ +- "loongarch" +- +-/* A spec condition that matches all architecture arguments. */ +- +-#define LARCH_ARCH_OPTION_SPEC \ +- LARCH_ISA_LEVEL_OPTION_SPEC "|march=*" +- +-/* A spec that infers a -loongarch argument from an -march argument. */ +- +-#define LARCH_ISA_LEVEL_SPEC \ +- "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;:}" +- +-/* A spec that injects the default multilib ISA if no architecture is +- specified. */ +- +-#define LARCH_DEFAULT_ISA_LEVEL_SPEC \ +- "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;: \ +- %{!march=*: -" MULTILIB_ISA_DEFAULT "}}" +- +-/* A spec that infers a -mhard-float or -msoft-float setting from an +- -march argument. Note that soft-float and hard-float code are not +- link-compatible. */ +- +-#define LARCH_ARCH_FLOAT_SPEC \ +- "%{mhard-float|msoft-float|mno-float|march=loongarch*:; \ +- march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \ +- |march=34kc|march=34kn|march=74kc|march=1004kc|march=5kc \ +- |march=m14k*|march=m5101|march=octeon|march=xlr: -msoft-float; \ +- march=*: -mhard-float}" +- +-/* A spec condition that matches 32-bit options. It only works if +- LARCH_ISA_LEVEL_SPEC has been applied. */ +- +-#define LARCH_32BIT_OPTION_SPEC \ +- "loongarch1|loongarch2|loongarch32*|mgp32" +- +-#if (LARCH_ABI_DEFAULT == ABILPX32 \ +- || LARCH_ABI_DEFAULT == ABILP64) +-#define OPT_ARCH64 "mabi=32|mgp32:;" +-#define OPT_ARCH32 "mabi=32|mgp32" +-#else +-#define OPT_ARCH64 "mabi=o64|mabi=n32|mabi=64|mgp64" +-#define OPT_ARCH32 "mabi=o64|mabi=n32|mabi=64|mgp64:;" +-#endif +- +-/* Support for a compile-time default CPU, et cetera. The rules are: +- --with-arch is ignored if -march is specified or a -loongarch is specified +- ; likewise --with-arch-32 and --with-arch-64. +- --with-tune is ignored if -mtune is specified; likewise +- --with-tune-32 and --with-tune-64. +- --with-abi is ignored if -mabi is specified. +- --with-float is ignored if -mhard-float or -msoft-float are +- specified. +- --with-fpu is ignored if -msoft-float, -msingle-float or -mdouble-float are +- specified. +- --with-fp-32 is ignored if -msoft-float, -msingle-float, -mlsx or -mfp are +- specified. +- --with-divide is ignored if -mdivide-traps or -mdivide-breaks are +- specified. */ +-#define OPTION_DEFAULT_SPECS \ +- {"arch", "%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \ +- {"arch_32", "%{" OPT_ARCH32 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \ +- {"arch_64", "%{" OPT_ARCH64 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \ +- {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ +- {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \ +- {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \ +- {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ +- {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \ +- {"fpu", "%{!msoft-float:%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}}" }, \ +- {"fp_32", "%{" OPT_ARCH32 \ +- ":%{!msoft-float:%{!msingle-float:%{!mfp*:%{!mlsx:%{!mloongson-asx:-mfp%(VALUE)}}}}}" }, \ +- {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" } +- +-/* A spec that infers the: +- -mlsx setting from a -march=la464 argument. +- -mlasx setting from a -march=la464 argument. */ +-#define BASE_DRIVER_SELF_SPECS \ +- LARCH_ASE_LSX_SPEC \ +- LARCH_ASE_LASX_SPEC +- +-#define LARCH_ASE_LSX_SPEC \ +- "%{!mno-lsx: \ +- %{march=la464: -mlsx}}" +- +-#define LARCH_ASE_LASX_SPEC \ +- "%{!mno-lasx: \ +- %{march=la464: -mlasx}}" +- +-#define DRIVER_SELF_SPECS \ +- BASE_DRIVER_SELF_SPECS +- +-/* from N_LARCH */ +-#define ABI_SPEC \ +- "%{mabi=lp32:32}" \ +- "%{mabi=lp64:64}" \ +- +-#define STARTFILE_PREFIX_SPEC \ +- "/lib" ABI_SPEC "/ " \ +- "/usr/lib" ABI_SPEC "/ " \ +- "/lib/ " \ +- "/usr/lib/ " ++/* Driver native functions for SPEC processing in the GCC driver. */ ++#include "loongarch-driver.h" + + /* This definition replaces the formerly used 'm' constraint with a + different constraint letter in order to avoid changing semantics of +@@ -341,71 +57,11 @@ struct loongarch_cpu_info { + must not be used in insn definitions or inline assemblies. */ + #define TARGET_MEM_CONSTRAINT 'w' + +-/* True if the file format uses 64-bit symbols. At present, this is +- only true for n64, which uses 64-bit ELF. */ +-#define FILE_HAS_64BIT_SYMBOLS (loongarch_abi == ABILP64) +- +-/* True if symbols are 64 bits wide. This is usually determined by +- the ABI's file format, but it can be overridden by -msym32. Note that +- overriding the size with -msym32 changes the ABI of relocatable objects, +- although it doesn't change the ABI of a fully-linked object. */ +-#define ABI_HAS_64BIT_SYMBOLS (FILE_HAS_64BIT_SYMBOLS \ +- && Pmode == DImode) +- +-/* ISA supports instructions DMUL, DMULU, DMUH, DMUHU. */ +-#define ISA_HAS_DMUL (TARGET_64BIT) +- +-/* ISA has floating-point RECIP.fmt and RSQRT.fmt instructions. The +- LARCH64 rev. 1 ISA says that RECIP.D and RSQRT.D are unpredictable when +- doubles are stored in pairs of FPRs, so for safety's sake, we apply +- this restriction to the LARCH IV ISA too. */ +-#define ISA_HAS_FP_RECIP_RSQRT(MODE) \ +- ((MODE) == SFmode \ +- || (TARGET_FLOAT64 \ +- && (MODE) == DFmode)) +- +-/* The LSX ASE is available. */ +-#define ISA_HAS_LSX (TARGET_LSX) +- +-/* The LASX ASE is available. */ +-#define ISA_HAS_LASX (TARGET_LASX) +- + /* Tell collect what flags to pass to nm. */ + #ifndef NM_FLAGS + #define NM_FLAGS "-Bn" + #endif + +- +-/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to +- the assembler. It may be overridden by subtargets. +- +- Beginning with gas 2.13, -mdebug must be passed to correctly handle +- COFF debugging info. */ +- +-#ifndef SUBTARGET_ASM_DEBUGGING_SPEC +-#define SUBTARGET_ASM_DEBUGGING_SPEC "\ +-%{g} %{g0} %{g1} %{g2} %{g3} \ +-%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \ +-%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \ +-%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3}" +-#endif +- +-/* FP_ASM_SPEC represents the floating-point options that must be passed +- to the assembler when FPXX support exists. Prior to that point the +- assembler could accept the options but were not required for +- correctness. We only add the options when absolutely necessary +- because passing -msoft-float to the assembler will cause it to reject +- all hard-float instructions which may require some user code to be +- updated. */ +- +-#ifdef HAVE_AS_DOT_MODULE +-#define FP_ASM_SPEC "\ +-%{mhard-float} %{msoft-float} \ +-%{msingle-float} %{mdouble-float}" +-#else +-#define FP_ASM_SPEC +-#endif +- + /* SUBTARGET_ASM_SPEC is always passed to the assembler. It may be + overridden by subtargets. */ + +@@ -414,29 +70,21 @@ struct loongarch_cpu_info { + #endif + + #undef ASM_SPEC +-#define ASM_SPEC "\ +-%{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \ +-" ++#define ASM_SPEC "%{mabi=lp64d:-mabi=lp64} %{subtarget_asm_spec}" ++ + /* Extra switches sometimes passed to the linker. */ + + #ifndef LINK_SPEC + #define LINK_SPEC "" +-#endif /* LINK_SPEC defined */ +- ++#endif /* LINK_SPEC defined */ + +-/* Specs for the compiler proper */ +- +-/* SUBTARGET_CC1_SPEC is passed to the compiler proper. It may be +- overridden by subtargets. */ +-#ifndef SUBTARGET_CC1_SPEC +-#define SUBTARGET_CC1_SPEC "" +-#endif ++/* Specs for the compiler proper. */ + + /* CC1_SPEC is the set of arguments to pass to the compiler proper. */ + + #undef CC1_SPEC + #define CC1_SPEC "\ +-%{G*} %{EB:-meb} %{EL:-mel} %{EB:%{EL:%emay not use both -EB and -EL}} \ ++%{G*} \ + %(subtarget_cc1_spec)" + + /* Preprocessor specs. */ +@@ -459,63 +107,38 @@ struct loongarch_cpu_info { + + Do not define this macro if it does not need to do anything. */ + +-#define EXTRA_SPECS \ +- { "subtarget_cc1_spec", SUBTARGET_CC1_SPEC }, \ +- { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ +- { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC }, \ +- { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \ +- { "asm_abi_default_spec", "-" MULTILIB_ABI_DEFAULT }, \ +- SUBTARGET_EXTRA_SPECS +- +-#ifndef SUBTARGET_EXTRA_SPECS +-#define SUBTARGET_EXTRA_SPECS +-#endif +- +-#define DBX_DEBUGGING_INFO 1 /* generate stabs (OSF/rose) */ +-#define DWARF2_DEBUGGING_INFO 1 /* dwarf2 debugging info */ +- +-#ifndef PREFERRED_DEBUGGING_TYPE +-#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +-#endif +- +-/* The size of DWARF addresses should be the same as the size of symbols +- in the target file format. They shouldn't depend on things like -msym32, +- because many DWARF consumers do not allow the mixture of address sizes +- that one would then get from linking -msym32 code with -msym64 code. +-*/ +-#define DWARF2_ADDR_SIZE (FILE_HAS_64BIT_SYMBOLS ? 8 : 4) +- +-/* By default, turn on GDB extensions. */ +-#define DEFAULT_GDB_EXTENSIONS 1 ++#define EXTRA_SPECS \ ++ {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \ ++ {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \ ++ {"subtarget_asm_spec", SUBTARGET_ASM_SPEC}, + + /* Registers may have a prefix which can be ignored when matching + user asm and register definitions. */ + #ifndef REGISTER_PREFIX +-#define REGISTER_PREFIX "$" ++#define REGISTER_PREFIX "$" + #endif + + /* Local compiler-generated symbols must have a prefix that the assembler +- understands. By default, this is $, although some targets (e.g., +- NetBSD-ELF) need to override this. */ ++ understands. */ + +-#ifndef LOCAL_LABEL_PREFIX +-#define LOCAL_LABEL_PREFIX "$" +-#endif ++#define LOCAL_LABEL_PREFIX "." + + /* By default on the loongarch, external symbols do not have an underscore +- prepended, but some targets (e.g., NetBSD) require this. */ ++ prepended. */ + +-#ifndef USER_LABEL_PREFIX +-#define USER_LABEL_PREFIX "" ++#define USER_LABEL_PREFIX "" ++ ++#ifndef PREFERRED_DEBUGGING_TYPE ++#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + #endif + +-/* On Sun 4, this limit is 2048. We use 1500 to be safe, +- since the length can run past this up to a continuation point. */ +-#undef DBX_CONTIN_LENGTH +-#define DBX_CONTIN_LENGTH 1500 ++/* The size of DWARF addresses should be the same as the size of symbols ++ in the target file format. */ ++#define DWARF2_ADDR_SIZE (TARGET_64BIT ? 8 : 4) + +-/* How to renumber registers for dbx and gdb. */ +-#define DBX_REGISTER_NUMBER(REGNO) loongarch_dbx_regno[REGNO] ++/* By default, produce dwarf version 2 format debugging output in response ++ to the ‘-g’ option. */ ++#define DWARF2_DEBUGGING_INFO 1 + + /* The mapping from gcc register number to DWARF 2 CFA column number. */ + #define DWARF_FRAME_REGNUM(REGNO) loongarch_dwarf_regno[REGNO] +@@ -530,7 +153,7 @@ struct loongarch_cpu_info { + #define EH_RETURN_DATA_REGNO(N) \ + ((N) < (4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM) + +-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, GP_ARG_FIRST + 4) ++#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, GP_ARG_FIRST + 4) + + #define EH_USES(N) loongarch_eh_uses (N) + +@@ -539,19 +162,7 @@ struct loongarch_cpu_info { + SFmode register saves. */ + #define DWARF_CIE_DATA_ALIGNMENT -4 + +-/* Correct the offset of automatic variables and arguments. Note that +- the LARCH debug format wants all automatic variables and arguments +- to be in terms of the virtual frame pointer (stack pointer before +- any adjustment in the function), while the LARCH 3.0 linker wants +- the frame pointer to be the stack pointer after the initial +- adjustment. */ +- +-#define DEBUGGER_AUTO_OFFSET(X) \ +- loongarch_debugger_offset (X, (HOST_WIDE_INT) 0) +-#define DEBUGGER_ARG_OFFSET(OFFSET, X) \ +- loongarch_debugger_offset (X, (HOST_WIDE_INT) OFFSET) +- +-/* Target machine storage layout */ ++/* Target machine storage layout. */ + + #define BITS_BIG_ENDIAN 0 + #define BYTES_BIG_ENDIAN 0 +@@ -576,27 +187,19 @@ struct loongarch_cpu_info { + #define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT) + + /* For LARCH, width of a floating point register. */ +-#define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4) +- +-/* The number of consecutive floating-point registers needed to store the +- largest format supported by the FPU. */ +-#define MAX_FPRS_PER_FMT (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2) +- +-/* The number of consecutive floating-point registers needed to store the +- smallest format supported by the FPU. */ +-#define MIN_FPRS_PER_FMT 1 ++#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4) + + /* The largest size of value that can be held in floating-point + registers and moved with a single instruction. */ + #define UNITS_PER_HWFPVALUE \ +- (TARGET_SOFT_FLOAT_ABI ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG) ++ (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FPREG) + + /* The largest size of value that can be held in floating-point + registers. */ +-#define UNITS_PER_FPVALUE \ +- (TARGET_SOFT_FLOAT_ABI ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ +- : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) ++#define UNITS_PER_FPVALUE \ ++ (TARGET_SOFT_FLOAT ? 0 \ ++ : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ ++ : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) + + /* The number of bytes in a double. */ + #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) +@@ -609,7 +212,7 @@ struct loongarch_cpu_info { + + #define FLOAT_TYPE_SIZE 32 + #define DOUBLE_TYPE_SIZE 64 +-#define LONG_DOUBLE_TYPE_SIZE (TARGET_NEWABI ? 128 : 64) ++#define LONG_DOUBLE_TYPE_SIZE (TARGET_64BIT ? 128 : 64) + + /* Define the sizes of fixed-point types. */ + #define SHORT_FRACT_TYPE_SIZE 8 +@@ -620,8 +223,6 @@ struct loongarch_cpu_info { + #define SHORT_ACCUM_TYPE_SIZE 16 + #define ACCUM_TYPE_SIZE 32 + #define LONG_ACCUM_TYPE_SIZE 64 +-/* FIXME. LONG_LONG_ACCUM_TYPE_SIZE should be 128 bits, but GCC +- doesn't support 128-bit integers for LARCH32 currently. */ + #define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64) + + /* long double is not a fixed mode, but the idea is that, if we +@@ -630,7 +231,7 @@ struct loongarch_cpu_info { + + /* Width in bits of a pointer. */ + #ifndef POINTER_SIZE +-#define POINTER_SIZE ((TARGET_64BIT) ? 64 : 32) ++#define POINTER_SIZE (TARGET_64BIT ? 64 : 32) + #endif + + /* Allocation boundary (in *bits*) for storing arguments in argument list. */ +@@ -642,8 +243,8 @@ struct loongarch_cpu_info { + /* Alignment of field after `int : 0' in a structure. */ + #define EMPTY_FIELD_BOUNDARY 32 + +-/* Every structure's size must be a multiple of this. */ +-/* 8 is observed right on a DECstation and on riscos 4.02. */ ++/* Number of bits which any structure or union's size must be a multiple of. ++ Each structure or union's size is rounded up to a multiple of this. */ + #define STRUCTURE_SIZE_BOUNDARY 8 + + /* There is no point aligning anything to a rounder boundary than +@@ -655,6 +256,9 @@ struct loongarch_cpu_info { + /* All accesses must be aligned. */ + #define STRICT_ALIGNMENT (TARGET_STRICT_ALIGN) + ++/* Glibc align malloc to 128 from glibc/sysdeps/generic/malloc-alignment.h. */ ++#define MALLOC_ABI_ALIGNMENT 128 ++ + /* Define this if you wish to imitate the way many other C compilers + handle alignment of bitfields and the structures that contain + them. +@@ -699,22 +303,17 @@ struct loongarch_cpu_info { + /* We need this for the same reason as DATA_ALIGNMENT, namely to cause + character arrays to be word-aligned so that `strcpy' calls that copy + constants to character arrays can be done inline, and 'strcmp' can be +- optimised to use word loads. */ +-#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ +- DATA_ALIGNMENT (TYPE, ALIGN) +- +-#define PAD_VARARGS_DOWN \ +- (targetm.calls.function_arg_padding (TYPE_MODE (type), type) == PAD_DOWNWARD) ++ optimised to use word loads. */ ++#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN) + + /* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ + #define WORD_REGISTER_OPERATIONS 1 + +-/* When in 64-bit mode, move insns will sign extend SImode and CCmode ++/* When in 64-bit mode, move insns will sign extend SImode and FCCmode + moves. All other references are zero extended. */ + #define LOAD_EXTEND_OP(MODE) \ +- (TARGET_64BIT && ((MODE) == SImode || (MODE) == CCmode) \ +- ? SIGN_EXTEND : ZERO_EXTEND) ++ ((TARGET_64BIT && (MODE) == SImode) ? SIGN_EXTEND : UNKNOWN) + + /* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, +@@ -722,13 +321,13 @@ struct loongarch_cpu_info { + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +-#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ +- if (GET_MODE_CLASS (MODE) == MODE_INT \ ++#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ ++ if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ +- { \ +- if ((MODE) == SImode) \ +- (UNSIGNEDP) = 0; \ +- (MODE) = Pmode; \ ++ { \ ++ if ((MODE) == SImode) \ ++ (UNSIGNEDP) = 0; \ ++ (MODE) = Pmode; \ + } + + /* Pmode is always the same as ptr_mode, but not always the same as word_mode. +@@ -738,11 +337,11 @@ struct loongarch_cpu_info { + /* Define if loading short immediate values into registers sign extends. */ + #define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +-/* The [d]clz instructions have the natural values at 0. */ ++/* The clz.{w/d} instructions have the natural values at 0. */ + + #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +- ++ + /* Standard register usage. */ + + /* Number of hardware registers. We have: +@@ -757,57 +356,39 @@ struct loongarch_cpu_info { + + #define FIRST_PSEUDO_REGISTER 74 + +-/* By default, fix the kernel registers ($26 and $27), the global +- pointer ($28) and the stack pointer ($29). This can change +- depending on the command-line options. +- +- Regarding coprocessor registers: without evidence to the contrary, +- it's best to assume that each coprocessor register has a unique +- use. This can be overridden, in, e.g., loongarch_option_override or +- TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be +- inappropriate for a particular target. */ +- ++/* zero, tp, sp and x are fixed. */ + #define FIXED_REGISTERS \ +-{ \ ++{ /* General-purpose registers. */ \ + 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* Floating-point registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* Others. */ \ + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1} + +- +-/* Set up this array for o32 by default. +- +- Note that we don't mark $31 as a call-clobbered register. The idea is +- that it's really the call instructions themselves which clobber $31. +- We don't care what the called function does with it afterwards. +- +- This approach makes it easier to implement sibcalls. Unlike normal +- calls, sibcalls don't clobber $31, so the register reaches the +- called function in tact. EPILOGUE_USES says that $31 is useful +- to the called function. */ +- ++/* The call RTLs themselves clobber ra. */ + #define CALL_USED_REGISTERS \ +-{ \ ++{ /* General registers. */ \ + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* Floating-point registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* Others. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + + /* Internal macros to classify a register number as to whether it's a +- general purpose register, a floating point register, a +- multiply/divide register, or a status register. */ ++ general purpose register, a floating point register, or a status ++ register. */ + + #define GP_REG_FIRST 0 +-#define GP_REG_LAST 31 +-#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1) +-#define GP_DBX_FIRST 0 ++#define GP_REG_LAST 31 ++#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1) + + #define FP_REG_FIRST 32 +-#define FP_REG_LAST 63 +-#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1) +-#define FP_DBX_FIRST ((write_symbols == DBX_DEBUG) ? 38 : 32) ++#define FP_REG_LAST 63 ++#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1) + + #define LSX_REG_FIRST FP_REG_FIRST + #define LSX_REG_LAST FP_REG_LAST +@@ -823,20 +404,16 @@ struct loongarch_cpu_info { + would need to be handled by the DWARF unwinder. */ + #define DWARF_ALT_FRAME_RETURN_COLUMN 72 + +-#define ST_REG_FIRST 64 +-#define ST_REG_LAST 71 +-#define ST_REG_NUM (ST_REG_LAST - ST_REG_FIRST + 1) ++#define FCC_REG_FIRST 64 ++#define FCC_REG_LAST 71 ++#define FCC_REG_NUM (FCC_REG_LAST - FCC_REG_FIRST + 1) + +-#define GP_REG_P(REGNO) \ ++#define GP_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM) +-#define M16_REG_P(REGNO) \ +- (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 16 || (REGNO) == 17) +-#define M16STORE_REG_P(REGNO) \ +- (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 0 || (REGNO) == 17) +-#define FP_REG_P(REGNO) \ ++#define FP_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM) +-#define ST_REG_P(REGNO) \ +- ((unsigned int) ((int) (REGNO) - ST_REG_FIRST) < ST_REG_NUM) ++#define FCC_REG_P(REGNO) \ ++ ((unsigned int) ((int) (REGNO) - FCC_REG_FIRST) < FCC_REG_NUM) + #define LSX_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - LSX_REG_FIRST) < LSX_REG_NUM) + #define LASX_REG_P(REGNO) \ +@@ -846,10 +423,6 @@ struct loongarch_cpu_info { + #define LSX_REG_RTX_P(X) (REG_P (X) && LSX_REG_P (REGNO (X))) + #define LASX_REG_RTX_P(X) (REG_P (X) && LASX_REG_P (REGNO (X))) + +- +-#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ +- loongarch_hard_regno_rename_ok (OLD_REG, NEW_REG) +- + /* Select a register mode required for caller save of hard regno REGNO. */ + #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + loongarch_hard_regno_caller_save_mode (REGNO, NREGS, MODE) +@@ -862,35 +435,34 @@ struct loongarch_cpu_info { + #define ARG_POINTER_REGNUM 72 + #define FRAME_POINTER_REGNUM 73 + +-#define HARD_FRAME_POINTER_REGNUM \ +- (GP_REG_FIRST + 22) +- +-/* FIXME: */ +-/* #define HARD_FRAME_POINTER_IS_FRAME_POINTER (HARD_FRAME_POINTER_REGNUM == FRAME_POINTER_REGNUM) */ +-/* #define HARD_FRAME_POINTER_IS_ARG_POINTER (HARD_FRAME_POINTER_REGNUM == ARG_POINTER_REGNUM) */ ++#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 22) + + #define HARD_FRAME_POINTER_IS_FRAME_POINTER 0 + #define HARD_FRAME_POINTER_IS_ARG_POINTER 0 + +-/* FIXME: */ + /* Register in which static-chain is passed to a function. */ +-#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8 */ +- +-#define LARCH_PROLOGUE_TEMP_REGNUM \ +- (GP_REG_FIRST + 13) +-#define LARCH_PROLOGUE_TEMP2_REGNUM \ +- (GP_REG_FIRST + 12) +-#define LARCH_PROLOGUE_TEMP3_REGNUM \ +- (GP_REG_FIRST + 14) +-#define LARCH_EPILOGUE_TEMP_REGNUM \ +- (GP_REG_FIRST + (12)) +- +-#define LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM) ++#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8 */ ++ ++/* DRAP register if static-chain register is unavailable. */ ++#define DRAP_REGNUM (GP_REG_FIRST + 15) /* $t3 */ ++ ++#define GP_TEMP_FIRST (GP_REG_FIRST + 12) ++#define LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1) ++#define LARCH_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST) ++#define LARCH_PROLOGUE_TEMP3_REGNUM (GP_TEMP_FIRST + 2) ++#define LARCH_EPILOGUE_TEMP_REGNUM (GP_TEMP_FIRST) ++ ++#define CALLEE_SAVED_REG_NUMBER(REGNO) \ ++ ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) - 22 : -1) ++ ++#define LARCH_PROLOGUE_TEMP(MODE) \ ++ gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM) + #define LARCH_PROLOGUE_TEMP2(MODE) \ + gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP2_REGNUM) + #define LARCH_PROLOGUE_TEMP3(MODE) \ + gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP3_REGNUM) +-#define LARCH_EPILOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM) ++#define LARCH_EPILOGUE_TEMP(MODE) \ ++ gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM) + + /* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. */ +@@ -898,7 +470,6 @@ struct loongarch_cpu_info { + + #define THREAD_POINTER_REGNUM (GP_REG_FIRST + 2) + +- + /* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + +@@ -908,7 +479,7 @@ struct loongarch_cpu_info { + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers +- that is allowed by "g" or "r" in a register constraint. ++ that is allowed by "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + +@@ -921,16 +492,16 @@ struct loongarch_cpu_info { + + enum reg_class + { +- NO_REGS, /* no registers in set */ +- SIBCALL_REGS, /* SIBCALL_REGS */ +- JALR_REGS, /* JALR_REGS */ +- GR_REGS, /* integer registers */ +- CSR_REGS, /* integer registers except for $r0 and $r1 for csr. */ +- FP_REGS, /* floating point registers */ +- ST_REGS, /* status registers (fp status) */ +- FRAME_REGS, /* arg pointer and frame pointer */ +- ALL_REGS, /* all registers */ +- LIM_REG_CLASSES /* max value + 1 */ ++ NO_REGS, /* no registers in set */ ++ SIBCALL_REGS, /* registers used by indirect sibcalls */ ++ JIRL_REGS, /* registers used by indirect calls */ ++ CSR_REGS, /* integer registers except for $r0 and $r1 for lcsr. */ ++ GR_REGS, /* integer registers */ ++ FP_REGS, /* floating point registers */ ++ FCC_REGS, /* status registers (fp status) */ ++ FRAME_REGS, /* arg pointer and frame pointer */ ++ ALL_REGS, /* all registers */ ++ LIM_REG_CLASSES /* max value + 1 */ + }; + + #define N_REG_CLASSES (int) LIM_REG_CLASSES +@@ -945,11 +516,11 @@ enum reg_class + { \ + "NO_REGS", \ + "SIBCALL_REGS", \ +- "JALR_REGS", \ +- "GR_REGS", \ ++ "JIRL_REGS", \ + "CSR_REGS", \ ++ "GR_REGS", \ + "FP_REGS", \ +- "ST_REGS", \ ++ "FCC_REGS", \ + "FRAME_REGS", \ + "ALL_REGS" \ + } +@@ -968,29 +539,28 @@ enum reg_class + #define REG_CLASS_CONTENTS \ + { \ + { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ +- { 0x001ff000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \ +- { 0xff9ffff0, 0x00000000, 0x00000000 }, /* JALR_REGS */ \ +- { 0xffffffff, 0x00000000, 0x00000000 }, /* GR_REGS */ \ ++ { 0x001fd000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \ ++ { 0xff9ffff0, 0x00000000, 0x00000000 }, /* JIRL_REGS */ \ + { 0xfffffffc, 0x00000000, 0x00000000 }, /* CSR_REGS */ \ ++ { 0xffffffff, 0x00000000, 0x00000000 }, /* GR_REGS */ \ + { 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ +- { 0x00000000, 0x00000000, 0x000000ff }, /* ST_REGS */ \ ++ { 0x00000000, 0x00000000, 0x000000ff }, /* FCC_REGS */ \ + { 0x00000000, 0x00000000, 0x00000300 }, /* FRAME_REGS */ \ + { 0xffffffff, 0xffffffff, 0x000003ff } /* ALL_REGS */ \ + } + +- + /* A C expression whose value is a register class containing hard + register REGNO. In general there is more that one such class; + choose a class which is "minimal", meaning that no smaller class + also contains the register. */ + +-#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[ (REGNO) ] ++#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[(REGNO)] + + /* A macro whose definition is the name of the class to which a + valid base register must belong. A base register is one used in + an address which is the register value plus a displacement. */ + +-#define BASE_REG_CLASS (GR_REGS) ++#define BASE_REG_CLASS (GR_REGS) + + /* A macro whose definition is the name of the class to which a + valid index register must belong. An index register is one used +@@ -998,7 +568,7 @@ enum reg_class + factor or added to another register (as well as added to a + displacement). */ + +-#define INDEX_REG_CLASS NO_REGS ++#define INDEX_REG_CLASS GR_REGS + + /* We generally want to put call-clobbered registers ahead of + call-saved ones. (IRA expects this.) */ +@@ -1006,10 +576,6 @@ enum reg_class + #define REG_ALLOC_ORDER \ + { /* Call-clobbered GPRs. */ \ + 12, 13, 14, 15, 16, 17, 18, 19, 20, 4, 5, 6, 7, 8, 9, 10, 11, 1, \ +- /* The global pointer. This is call-clobbered for o32 and o64 \ +- abicalls, call-saved for n32 and n64 abicalls, and a program \ +- invariant otherwise. Putting it between the call-clobbered \ +- and call-saved registers should cope with all eventualities. */ \ + /* Call-saved GPRs. */ \ + 23, 24, 25, 26, 27, 28, 29, 30, 31, \ + /* GPRs that can never be exposed to the register allocator. */ \ +@@ -1017,31 +583,27 @@ enum reg_class + /* Call-clobbered FPRs. */ \ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, 50, 51,52, 53, 54, 55, \ +- /* FPRs that are usually call-saved. The odd ones are actually \ +- call-clobbered for n32, but listing them ahead of the even \ +- registers might encourage the register allocator to fragment \ +- the available FPR pairs. We need paired FPRs to store long \ +- doubles, so it isn't clear that using a different order \ +- for n32 would be a win. */ \ + 56, 57, 58, 59, 60, 61, 62, 63, \ + /* None of the remaining classes have defined call-saved \ + registers. */ \ + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73} + ++#define IMM_BITS 12 ++#define IMM_REACH (HOST_WIDE_INT_1 << IMM_BITS) ++#define HWIT_1U HOST_WIDE_INT_1U ++ + /* True if VALUE is an unsigned 6-bit number. */ + +-#define UIMM6_OPERAND(VALUE) \ +- (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0) ++#define UIMM6_OPERAND(VALUE) (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0) + + /* True if VALUE is a signed 10-bit number. */ + +-#define IMM10_OPERAND(VALUE) \ +- ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400) ++#define IMM10_OPERAND(VALUE) ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400) + + /* True if VALUE is a signed 12-bit number. */ + + #define IMM12_OPERAND(VALUE) \ +- ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000) ++ ((unsigned HOST_WIDE_INT) (VALUE) + IMM_REACH / 2 < IMM_REACH) + + /* True if VALUE is a signed 13-bit number. */ + +@@ -1053,67 +615,51 @@ enum reg_class + #define IMM16_OPERAND(VALUE) \ + ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000) + +- +-/* True if VALUE is a signed 12-bit number. */ +- +-#define SMALL_OPERAND(VALUE) \ +- ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000) +- + /* True if VALUE is an unsigned 12-bit number. */ + +-#define SMALL_OPERAND_UNSIGNED(VALUE) \ +- (((VALUE) & ~(unsigned HOST_WIDE_INT) 0xfff) == 0) ++#define IMM12_OPERAND_UNSIGNED(VALUE) \ ++ (((VALUE) & ~(unsigned HOST_WIDE_INT) (IMM_REACH - 1)) == 0) + +-/* True if VALUE can be loaded into a register using LUI. */ ++/* True if VALUE can be loaded into a register using LU12I. */ + +-#define LUI_OPERAND(VALUE) \ +- (((VALUE) | 0x7ffff000) == 0x7ffff000 \ +- || ((VALUE) | 0x7ffff000) + 0x1000 == 0) ++#define LU12I_OPERAND(VALUE) \ ++ (((VALUE) | ((HWIT_1U << 31) - IMM_REACH)) == ((HWIT_1U << 31) - IMM_REACH) \ ++ || ((VALUE) | ((HWIT_1U << 31) - IMM_REACH)) + IMM_REACH == 0) + +-/* True if VALUE can be loaded into a register using LUI. */ ++/* True if VALUE can be loaded into a register using LU32I. */ + +-#define LU32I_OPERAND(VALUE) \ +- ((((VALUE) | 0x7ffff00000000) == 0x7ffff00000000) \ +- || ((VALUE) | 0x7ffff00000000) + 0x100000000 == 0) ++#define LU32I_OPERAND(VALUE) \ ++ (((VALUE) | (((HWIT_1U << 19) - 1) << 32)) == (((HWIT_1U << 19) - 1) << 32) \ ++ || ((VALUE) | (((HWIT_1U << 19) - 1) << 32)) + (HWIT_1U << 32) == 0) + +-/* True if VALUE can be loaded into a register using LUI. */ ++/* True if VALUE can be loaded into a register using LU52I. */ + +-#define LU52I_OPERAND(VALUE) \ +- ((((VALUE) | 0xfff0000000000000) == 0xfff0000000000000)) ++#define HWIT_UC_0xFFF HOST_WIDE_INT_UC(0xfff) ++#define LU52I_OPERAND(VALUE) \ ++ (((VALUE) | (HWIT_UC_0xFFF << 52)) == (HWIT_UC_0xFFF << 52)) + + /* Return a value X with the low 12 bits clear, and such that + VALUE - X is a signed 12-bit value. */ + +-#define CONST_HIGH_PART(VALUE) \ +- (((VALUE) + 0x800) & ~(unsigned HOST_WIDE_INT) 0xfff) ++#define CONST_HIGH_PART(VALUE) (((VALUE) + (IMM_REACH / 2)) & ~(IMM_REACH - 1)) + +-#define CONST_LOW_PART(VALUE) \ +- ((VALUE) - CONST_HIGH_PART (VALUE)) ++#define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE)) + +-#define SMALL_INT(X) SMALL_OPERAND (INTVAL (X)) +-#define SMALL_INT_UNSIGNED(X) SMALL_OPERAND_UNSIGNED (INTVAL (X)) +-#define LUI_INT(X) LUI_OPERAND (INTVAL (X)) ++#define IMM12_INT(X) IMM12_OPERAND (INTVAL (X)) ++#define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X)) ++#define LU12I_INT(X) LU12I_OPERAND (INTVAL (X)) + #define LU32I_INT(X) LU32I_OPERAND (INTVAL (X)) + #define LU52I_INT(X) LU52I_OPERAND (INTVAL (X)) +-#define ULARCH_12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047)) ++#define LARCH_U12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047)) + #define LARCH_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -256, 255)) +-#define LISA_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767)) +-#define LISA_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0) +- +-/* The HI and LO registers can only be reloaded via the general +- registers. Condition code registers can only be loaded to the +- general registers, and from the floating point registers. */ +- +-#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ +- loongarch_secondary_reload_class (CLASS, MODE, X, true) +-#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ +- loongarch_secondary_reload_class (CLASS, MODE, X, false) ++#define LARCH_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767)) ++#define LARCH_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0) + + /* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ + + #define CLASS_MAX_NREGS(CLASS, MODE) loongarch_class_max_nregs (CLASS, MODE) +- ++ + /* Stack layout; function entry, exit and calling. */ + + #define STACK_GROWS_DOWNWARD 1 +@@ -1127,11 +673,13 @@ enum reg_class + + #define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta + +-#define ELIMINABLE_REGS \ +-{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ +- { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ +- { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ +- { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},} ++#define ELIMINABLE_REGS \ ++ { \ ++ {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ ++ {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ ++ {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ ++ {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ ++ } + + #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = loongarch_initial_elimination_offset ((FROM), (TO)) +@@ -1142,11 +690,7 @@ enum reg_class + /* The argument pointer always points to the first argument. */ + #define FIRST_PARM_OFFSET(FNDECL) 0 + +-/* o32 and o64 reserve stack space for all argument registers. */ +-#define REG_PARM_STACK_SPACE(FNDECL) \ +- (TARGET_OLDABI \ +- ? (MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD) \ +- : 0) ++#define REG_PARM_STACK_SPACE(FNDECL) 0 + + /* Define this if it is the responsibility of the caller to + allocate the area reserved for arguments passed in registers. +@@ -1155,22 +699,25 @@ enum reg_class + `crtl->outgoing_args_size'. */ + #define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +-#define STACK_BOUNDARY (TARGET_NEWABI ? 128 : 64) +- ++#define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64) ++ ++/* Maximum stack alignment. */ ++#define MAX_STACK_ALIGNMENT (loongarch_stack_realign ? MAX_OFILE_ALIGNMENT : STACK_BOUNDARY) ++ + /* Symbolic macros for the registers used to return integer and floating + point values. */ + + #define GP_RETURN (GP_REG_FIRST + 4) + #define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0)) + +-#define MAX_ARGS_IN_REGISTERS (TARGET_OLDABI ? 4 : 8) ++#define MAX_ARGS_IN_REGISTERS 8 + + /* Symbolic macros for the first/last argument registers. */ + + #define GP_ARG_FIRST (GP_REG_FIRST + 4) +-#define GP_ARG_LAST (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) ++#define GP_ARG_LAST (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) + #define FP_ARG_FIRST (FP_REG_FIRST + 0) +-#define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) ++#define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) + + /* True if MODE is vector and supported in a LSX vector register. */ + #define LSX_SUPPORTED_MODE_P(MODE) \ +@@ -1188,60 +735,39 @@ enum reg_class + && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \ + || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)) + ++#define RECIP_MASK_NONE 0x00 ++#define RECIP_MASK_DIV 0x01 ++#define RECIP_MASK_SQRT 0x02 ++#define RECIP_MASK_RSQRT 0x04 ++#define RECIP_MASK_VEC_DIV 0x08 ++#define RECIP_MASK_VEC_SQRT 0x10 ++#define RECIP_MASK_VEC_RSQRT 0x20 ++#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \ ++ | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \ ++ | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT) ++ ++#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_RSQRT ((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_RSQRT ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664) ++ + /* 1 if N is a possible register number for function argument passing. + We have no FP argument registers when soft-float. */ + + /* Accept arguments in a0-a7, and in fa0-fa7 if permitted by the ABI. */ +-#define FUNCTION_ARG_REGNO_P(N) \ +- (IN_RANGE ((N), GP_ARG_FIRST, GP_ARG_LAST) \ ++#define FUNCTION_ARG_REGNO_P(N) \ ++ (IN_RANGE ((N), GP_ARG_FIRST, GP_ARG_LAST) \ + || (UNITS_PER_FP_ARG && IN_RANGE ((N), FP_ARG_FIRST, FP_ARG_LAST))) + +- +-/* This structure has to cope with two different argument allocation +- schemes. Most LARCH ABIs view the arguments as a structure, of which +- the first N words go in registers and the rest go on the stack. If I +- < N, the Ith word might go in Ith integer argument register or in a +- floating-point register. For these ABIs, we only need to remember +- the offset of the current argument into the structure. +- +- So for the standard ABIs, the first N words are allocated to integer +- registers, and loongarch_function_arg decides on an argument-by-argument +- basis whether that argument should really go in an integer register, +- or in a floating-point one. */ +- +-typedef struct loongarch_args { +- /* Always true for varargs functions. Otherwise true if at least +- one argument has been passed in an integer register. */ +- int gp_reg_found; +- +- /* The number of arguments seen so far. */ +- unsigned int arg_number; +- +- /* The number of integer registers used so far. This is the number +- of words that have been added to the argument structure, limited +- to MAX_ARGS_IN_REGISTERS. */ ++typedef struct { ++ /* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS. */ + unsigned int num_gprs; + ++ /* Number of floating-point registers used so far, likewise. */ + unsigned int num_fprs; + +- /* The number of words passed on the stack. */ +- unsigned int stack_words; +- +- /* On the loongarch16, we need to keep track of which floating point +- arguments were passed in general registers, but would have been +- passed in the FP regs if this were a 32-bit function, so that we +- can move them to the FP regs if we wind up calling a 32-bit +- function. We record this information in fp_code, encoded in base +- four. A zero digit means no floating point argument, a one digit +- means an SFmode argument, and a two digit means a DFmode argument, +- and a three digit is not used. The low order digit is the first +- argument. Thus 6 == 1 * 4 + 2 means a DFmode argument followed by +- an SFmode argument. ??? A more sophisticated approach will be +- needed if LARCH_ABI != ABILP32. */ +- int fp_code; +- +- /* True if the function has a prototype. */ +- int prototype; + } CUMULATIVE_ARGS; + + /* Initialize a variable CUM of type CUMULATIVE_ARGS +@@ -1251,48 +777,37 @@ typedef struct loongarch_args { + #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + memset (&(CUM), 0, sizeof (CUM)) + +- +-#define EPILOGUE_USES(REGNO) loongarch_epilogue_uses (REGNO) ++#define EPILOGUE_USES(REGNO) loongarch_epilogue_uses (REGNO) + ++#define STACK_ALIGN_SIZE_INTERNAL \ ++ (crtl->stack_realign_needed) \ ++? (crtl->stack_alignment_needed / BITS_PER_UNIT) \ ++: (TARGET_ABI_LP64 ? 16 : 8) + /* Treat LOC as a byte offset from the stack pointer and round it up + to the next fully-aligned offset. */ + #define LARCH_STACK_ALIGN(LOC) \ +- (TARGET_NEWABI ? ROUND_UP ((LOC), 16) : ROUND_UP ((LOC), 8)) ++ ROUND_UP ((LOC), TARGET_ABI_LP64 ? 16 : 8) + +- +-/* Output assembler code to FILE to increment profiler label # LABELNO +- for profiling a function entry. */ ++#define LARCH_STACK_ALIGN2(LOC) \ ++ ROUND_UP ((LOC), STACK_ALIGN_SIZE_INTERNAL) + + #define MCOUNT_NAME "_mcount" + + /* Emit rtl for profiling. Output assembler code to FILE + to call "_mcount" for profiling a function entry. */ +-#define PROFILE_HOOK(LABEL) \ +- { \ +- rtx fun, ra; \ +- ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); \ +- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ +- emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode); \ ++#define PROFILE_HOOK(LABEL) \ ++ { \ ++ rtx fun, ra; \ ++ ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); \ ++ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ ++ emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode); \ + } + + /* All the work done in PROFILE_HOOK, but still required. */ + #define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) + +- +-/* The profiler preserves all interesting registers, including $31. */ +-#define LARCH_SAVE_REG_FOR_PROFILING_P(REGNO) false +- +-/* No loongarch port has ever used the profiler counter word, so don't emit it +- or the label for it. */ +- + #define NO_PROFILE_COUNTERS 1 + +-/* Define this macro if the code for function profiling should come +- before the function prologue. Normally, the profiling code comes +- after. */ +- +-/* #define PROFILE_BEFORE_PROLOGUE */ +- + /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. +@@ -1300,16 +815,13 @@ typedef struct loongarch_args { + + #define EXIT_IGNORE_STACK 1 + +- + /* Trampolines are a block of code followed by two pointers. */ + ++#define TRAMPOLINE_CODE_SIZE 16 + #define TRAMPOLINE_SIZE \ +- (loongarch_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2) +- +-/* Forcing a 64-bit alignment for 32-bit targets allows us to load two +- pointers from a single LUI base. */ +- +-#define TRAMPOLINE_ALIGNMENT 64 ++ ((Pmode == SImode) ? TRAMPOLINE_CODE_SIZE \ ++ : (TRAMPOLINE_CODE_SIZE + POINTER_SIZE * 2)) ++#define TRAMPOLINE_ALIGNMENT POINTER_SIZE + + /* loongarch_trampoline_init calls this library function to flush + program and data caches. */ +@@ -1318,96 +830,64 @@ typedef struct loongarch_args { + #define CACHE_FLUSH_FUNC "_flush_cache" + #endif + +-#define LARCH_ICACHE_SYNC(ADDR, SIZE) \ +- /* Flush both caches. We need to flush the data cache in case \ +- the system has a write-back cache. */ \ +- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, loongarch_cache_flush_func), \ +- LCT_NORMAL, VOIDmode, ADDR, Pmode, SIZE, Pmode, \ +- GEN_INT (3), TYPE_MODE (integer_type_node)) +- +- + /* Addressing modes, and classification of registers for them. */ + +-#define REGNO_OK_FOR_INDEX_P(REGNO) 0 ++#define REGNO_OK_FOR_INDEX_P(REGNO) \ ++ loongarch_regno_mode_ok_for_base_p (REGNO, VOIDmode, 1) ++ + #define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + loongarch_regno_mode_ok_for_base_p (REGNO, MODE, 1) +- ++ + /* Maximum number of registers that can appear in a valid memory address. */ + +-#define MAX_REGS_PER_ADDRESS 1 ++#define MAX_REGS_PER_ADDRESS 2 + + /* Check for constness inline but use loongarch_legitimate_address_p + to check whether a constant really is an address. */ + +-#define CONSTANT_ADDRESS_P(X) \ +- (CONSTANT_P (X) && memory_address_p (SImode, X)) ++#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && memory_address_p (SImode, X)) + + /* This handles the magic '..CURRENT_FUNCTION' symbol, which means + 'the start of the function that this code is output in'. */ + +-#define ASM_OUTPUT_LABELREF(FILE,NAME) \ +- do { \ +- if (strcmp (NAME, "..CURRENT_FUNCTION") == 0) \ +- asm_fprintf ((FILE), "%U%s", \ +- XSTR (XEXP (DECL_RTL (current_function_decl), \ +- 0), 0)); \ +- else \ +- asm_fprintf ((FILE), "%U%s", (NAME)); \ +- } while (0) +- +-/* Flag to mark a function decl symbol that requires a long call. */ +-#define SYMBOL_FLAG_LONG_CALL (SYMBOL_FLAG_MACH_DEP << 0) +-#define SYMBOL_REF_LONG_CALL_P(X) \ +- ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0) +- +-/* This flag marks functions that cannot be lazily bound. */ +-#define SYMBOL_FLAG_BIND_NOW (SYMBOL_FLAG_MACH_DEP << 1) +-#define SYMBOL_REF_BIND_NOW_P(RTX) \ +- ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_BIND_NOW) != 0) +- +-/* True if we're generating a form of LARCH16 code in which jump tables +- are stored in the text section and encoded as 16-bit PC-relative +- offsets. This is only possible when general text loads are allowed, +- since the table access itself will be an "lh" instruction. If the +- PC-relative offsets grow too large, 32-bit offsets are used instead. */ +- +- +-#define CASE_VECTOR_MODE (ptr_mode) ++#define ASM_OUTPUT_LABELREF(FILE, NAME) \ ++ do \ ++ { \ ++ if (strcmp (NAME, "..CURRENT_FUNCTION") == 0) \ ++ asm_fprintf ((FILE), "%U%s", \ ++ XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); \ ++ else \ ++ asm_fprintf ((FILE), "%U%s", (NAME)); \ ++ } \ ++ while (0) + +-/* Only use short offsets if their range will not overflow. */ +-#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) \ +- (ptr_mode ? HImode : SImode) ++#define CASE_VECTOR_MODE Pmode + ++#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode + + /* Define this as 1 if `char' should by default be signed; else as 0. */ + #ifndef DEFAULT_SIGNED_CHAR + #define DEFAULT_SIGNED_CHAR 1 + #endif + +-/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets, +- we generally don't want to use them for copying arbitrary data. +- A single N-word move is usually the same cost as N single-word moves. */ ++/* The SPARC port says: ++ The maximum number of bytes that a single instruction ++ can move quickly between memory and registers or between ++ two memory locations. */ + #define MOVE_MAX UNITS_PER_WORD + /* We don't modify it for LSX as it is only used by the classic reload. */ + #define MAX_MOVE_MAX 8 + +-/* Define this macro as a C expression which is nonzero if +- accessing less than a word of memory (i.e. a `char' or a +- `short') is no faster than accessing a word of memory, i.e., if +- such access require more than one instruction or if there is no +- difference in cost between byte and (aligned) word loads. +- +- On RISC machines, it tends to generate better code to define +- this as 1, since it avoids making a QI or HI mode register. +- +-*/ +-#define SLOW_BYTE_ACCESS (1) +- +-/* Standard LARCH integer shifts truncate the shift amount to the +- width of the shifted operand. However, Loongson MMI shifts +- do not truncate the shift amount at all. */ +-#define SHIFT_COUNT_TRUNCATED (1) ++/* The SPARC port says: ++ Nonzero if access to memory by bytes is slow and undesirable. ++ For RISC chips, it means that access to memory by bytes is no ++ better than access by words when possible, so grab a whole word ++ and maybe make use of that. */ ++#define SLOW_BYTE_ACCESS 1 + ++/* Standard LoongArch integer shifts truncate the shift amount to the ++ width of the shifted operand. */ ++#define SHIFT_COUNT_TRUNCATED 1 + + /* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction +@@ -1422,7 +902,6 @@ typedef struct loongarch_args { + + #define FUNCTION_MODE SImode + +- + /* We allocate $fcc registers by hand and can't cope with moves of + CCmode registers to and from pseudos (or memory). */ + #define AVOID_CCMODE_COPIES +@@ -1433,14 +912,6 @@ typedef struct loongarch_args { + #define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost + #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + +-/* The LARCH port has several functions that return an instruction count. +- Multiplying the count by this value gives the number of bytes that +- the instructions occupy. */ +-#define BASE_INSN_LENGTH (4) +- +-/* The length of a NOP in bytes. */ +-#define NOP_INSN_LENGTH (4) +- + /* If defined, modifies the length assigned to instruction INSN as a + function of the context in which it is used. LENGTH is an lvalue + that contains the initially computed length of the insn and should +@@ -1451,17 +922,8 @@ typedef struct loongarch_args { + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for + its operands. */ +-#define LARCH_BRANCH(OPCODE, OPERANDS) \ +- OPCODE "\t" OPERANDS ++#define LARCH_BRANCH(OPCODE, OPERANDS) OPCODE "\t" OPERANDS + +-#define LARCH_BRANCH_C(OPCODE, OPERANDS) \ +- OPCODE "%:\t" OPERANDS +- +-/* Return an asm string that forces INSN to be treated as an absolute +- J or JAL instruction instead of an assembler macro. */ +-#define LARCH_ABSOLUTE_JUMP(INSN) INSN +- +- + /* Control the assembler format that we output. */ + + /* Output to assembler file text saying following lines +@@ -1478,20 +940,19 @@ typedef struct loongarch_args { + #define ASM_APP_OFF " #NO_APP\n" + #endif + +-#define REGISTER_NAMES \ +-{ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ +- "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", \ +- "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ +- "$r24", "$r25", "$r26", "$r27", "$r28", "$r29", "$r30", "$r31", \ +- "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", \ +- "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ +- "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", \ +- "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", \ +- "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7", \ ++#define REGISTER_NAMES \ ++{ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ ++ "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", \ ++ "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ ++ "$r24", "$r25", "$r26", "$r27", "$r28", "$r29", "$r30", "$r31", \ ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", \ ++ "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ ++ "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", \ ++ "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", \ ++ "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7", \ + "$arg", "$frame"} + +-/* List the "software" names for each register. Also list the numerical +- names for $fp and $sp. */ ++/* This macro defines additional names for hard registers. */ + + #define ADDITIONAL_REGISTER_NAMES \ + { \ +@@ -1595,61 +1056,17 @@ typedef struct loongarch_args { + { "xr31", 31 + FP_REG_FIRST } \ + } + +-#define DBR_OUTPUT_SEQEND(STREAM) \ +-do \ +- { \ +- /* Emit a blank line after the delay slot for emphasis. */ \ +- fputs ("\n", STREAM); \ +- } \ +-while (0) +- +-/* The LARCH implementation uses some labels for its own purpose. The +- following lists what labels are created, and are all formed by the +- pattern $L[a-z].*. The machine independent portion of GCC creates +- labels matching: $L[A-Z][0-9]+ and $L[0-9]+. +- +- LM[0-9]+ Silicon Graphics/ECOFF stabs label before each stmt. +- $Lb[0-9]+ Begin blocks for LARCH debug support +- $Lc[0-9]+ Label for use in s operation. +- $Le[0-9]+ End blocks for LARCH debug support */ +- +-#undef ASM_DECLARE_OBJECT_NAME +-#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \ +- loongarch_declare_object (STREAM, NAME, "", ":\n") +- + /* Globalizing directive for a label. */ + #define GLOBAL_ASM_OP "\t.globl\t" + +-/* This says how to define a global common symbol. */ +- +-#define ASM_OUTPUT_ALIGNED_DECL_COMMON loongarch_output_aligned_decl_common +- +-/* This says how to define a local common symbol (i.e., not visible to +- linker). */ +- +-#ifndef ASM_OUTPUT_ALIGNED_LOCAL +-#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \ +- loongarch_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, false) +-#endif +- + /* This says how to output an external. It would be possible not to +- output anything and let undefined symbol become external. However ++ output anything and let undefined symbol become external. However + the assembler uses length information on externals to allocate in + data/sdata bss/sbss, thereby saving exec time. */ + + #undef ASM_OUTPUT_EXTERNAL +-#define ASM_OUTPUT_EXTERNAL(STREAM,DECL,NAME) \ +- loongarch_output_external(STREAM,DECL,NAME) +- +-/* This is how to declare a function name. The actual work of +- emitting the label is moved to function_prologue, so that we can +- get the line number correctly emitted before the .ent directive, +- and after any .file directives. Define as empty so that the function +- is not declared before the .ent directive elsewhere. */ +- +-#undef ASM_DECLARE_FUNCTION_NAME +-#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \ +- loongarch_declare_function_name(STREAM,NAME,DECL) ++#define ASM_OUTPUT_EXTERNAL(STREAM, DECL, NAME) \ ++ loongarch_output_external (STREAM, DECL, NAME) + + /* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where +@@ -1657,8 +1074,8 @@ while (0) + This is suitable for output with `assemble_name'. */ + + #undef ASM_GENERATE_INTERNAL_LABEL +-#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ +- sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM)) ++#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ ++ sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM)) + + /* Print debug labels as "foo = ." rather than "foo:" because they should + represent a byte pointer rather than an ISA-encoded address. This is +@@ -1677,159 +1094,108 @@ while (0) + At the time of writing, this hook is not used for the function end + label: + +- $LFExxx: ++ $LFExxx: + .end foo + + */ + +-#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \ ++#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \ + fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM) + + /* This is how to output an element of a case-vector that is absolute. */ + +-#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ +- fprintf (STREAM, "\t%s\t%sL%d\n", \ +- ptr_mode == DImode ? ".dword" : ".word", \ +- LOCAL_LABEL_PREFIX, \ +- VALUE) +- +-/* This is how to output an element of a case-vector. We can make the +- entries GP-relative when .gp(d)word is supported. */ +- +-#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ +-do { \ +- if (TARGET_RTP_PIC) \ +- { \ +- /* Make the entry relative to the start of the function. */ \ +- rtx fnsym = XEXP (DECL_RTL (current_function_decl), 0); \ +- fprintf (STREAM, "\t%s\t%sL%d-", \ +- Pmode == DImode ? ".dword" : ".word", \ +- LOCAL_LABEL_PREFIX, VALUE); \ +- assemble_name (STREAM, XSTR (fnsym, 0)); \ +- fprintf (STREAM, "\n"); \ +- } \ +- else \ +- fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n", \ +- ptr_mode == DImode ? ".dword" : ".word", \ +- LOCAL_LABEL_PREFIX, VALUE, \ +- LOCAL_LABEL_PREFIX, REL); \ +-} while (0) +- +-/* Mark inline jump tables as data for the purpose of disassembly. For +- simplicity embed the jump table's label number in the local symbol +- produced so that multiple jump tables within a single function end +- up marked with unique symbols. Retain the alignment setting from +- `elfos.h' as we are replacing the definition from there. */ +- +-#undef ASM_OUTPUT_BEFORE_CASE_LABEL +-#define ASM_OUTPUT_BEFORE_CASE_LABEL(STREAM, PREFIX, NUM, TABLE) \ +- do \ +- { \ +- ASM_OUTPUT_ALIGN ((STREAM), 2); \ +- if (JUMP_TABLES_IN_TEXT_SECTION) \ +- loongarch_set_text_contents_type (STREAM, "__jump_", NUM, FALSE); \ +- } \ +- while (0) ++#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ ++ fprintf (STREAM, "\t%s\t%sL%d\n", ptr_mode == DImode ? ".dword" : ".word", \ ++ LOCAL_LABEL_PREFIX, VALUE) + +-/* Reset text marking to code after an inline jump table. Like with +- the beginning of a jump table use the label number to keep symbols +- unique. */ ++/* This is how to output an element of a case-vector. */ + +-#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) \ +- do \ +- if (JUMP_TABLES_IN_TEXT_SECTION) \ +- loongarch_set_text_contents_type (STREAM, "__jend_", NUM, TRUE); \ ++#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ ++ do \ ++ { \ ++ fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n", \ ++ ptr_mode == DImode ? ".dword" : ".word", LOCAL_LABEL_PREFIX, \ ++ VALUE, LOCAL_LABEL_PREFIX, REL); \ ++ } \ + while (0) + ++#define JUMP_TABLES_IN_TEXT_SECTION 0 ++ + /* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +-#define ASM_OUTPUT_ALIGN(STREAM,LOG) \ +- fprintf (STREAM, "\t.align\t%d\n", (LOG)) ++#define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG)) + +-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM,LOG) \ ++/* "nop" instruction 54525952 (andi $r0,$r0,0) is ++ used for padding. */ ++#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \ + fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG)) + +- + /* This is how to output an assembler line to advance the location + counter by SIZE bytes. */ + + #undef ASM_OUTPUT_SKIP +-#define ASM_OUTPUT_SKIP(STREAM,SIZE) \ +- fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)) ++#define ASM_OUTPUT_SKIP(STREAM, SIZE) \ ++ fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n", (SIZE)) + + /* This is how to output a string. */ + #undef ASM_OUTPUT_ASCII + #define ASM_OUTPUT_ASCII loongarch_output_ascii + +- +-/* Default to -G 8 */ +-#ifndef LARCH_DEFAULT_GVALUE +-#define LARCH_DEFAULT_GVALUE 8 +-#endif +- + /* Define the strings to put out for each section in the object file. */ +-#define TEXT_SECTION_ASM_OP "\t.text" /* instructions */ +-#define DATA_SECTION_ASM_OP "\t.data" /* large data */ ++#define TEXT_SECTION_ASM_OP "\t.text" /* instructions */ ++#define DATA_SECTION_ASM_OP "\t.data" /* large data */ + + #undef READONLY_DATA_SECTION_ASM_OP +-#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" /* read-only data */ +- +-#define ASM_OUTPUT_REG_PUSH(STREAM,REGNO) \ +-do \ +- { \ +- fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,0(%s)\n", \ +- TARGET_64BIT ? "daddiu" : "addiu", \ +- reg_names[STACK_POINTER_REGNUM], \ +- reg_names[STACK_POINTER_REGNUM], \ +- TARGET_64BIT ? "sd" : "sw", \ +- reg_names[REGNO], \ +- reg_names[STACK_POINTER_REGNUM]); \ +- } \ +-while (0) +- +-#define ASM_OUTPUT_REG_POP(STREAM,REGNO) \ +-do \ +- { \ +- loongarch_push_asm_switch (&loongarch_noreorder); \ +- fprintf (STREAM, "\t%s\t%s,0(%s)\n\t%s\t%s,%s,8\n", \ +- TARGET_64BIT ? "ld" : "lw", \ +- reg_names[REGNO], \ +- reg_names[STACK_POINTER_REGNUM], \ +- TARGET_64BIT ? "daddu" : "addu", \ +- reg_names[STACK_POINTER_REGNUM], \ +- reg_names[STACK_POINTER_REGNUM]); \ +- loongarch_pop_asm_switch (&loongarch_noreorder); \ +- } \ +-while (0) ++#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" /* read-only data */ ++ ++#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \ ++ do \ ++ { \ ++ fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,%s,0\n", \ ++ TARGET_64BIT ? "addi.d" : "addi.w", \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ TARGET_64BIT ? "st.d" : "st.w", reg_names[REGNO], \ ++ reg_names[STACK_POINTER_REGNUM]); \ ++ } \ ++ while (0) ++ ++#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \ ++ do \ ++ { \ ++ fprintf (STREAM, "\t%s\t%s,%s,0\n\t%s\t%s,%s,8\n", \ ++ TARGET_64BIT ? "ld.d" : "ld.w", reg_names[REGNO], \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ TARGET_64BIT ? "addi.d" : "addi.w", \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ reg_names[STACK_POINTER_REGNUM]); \ ++ } \ ++ while (0) + + /* How to start an assembler comment. +- The leading space is important (the loongarch native assembler requires it). */ ++ The leading space is important (the loongarch native assembler requires it). ++ */ + #ifndef ASM_COMMENT_START + #define ASM_COMMENT_START " #" + #endif +- ++ + #undef SIZE_TYPE + #define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int") + + #undef PTRDIFF_TYPE + #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") + +-/* The minimum alignment of any expanded block move. */ +-#define LARCH_MIN_MOVE_MEM_ALIGN 16 +- + /* The maximum number of bytes that can be copied by one iteration of + a movmemsi loop; see loongarch_block_move_loop. */ +-#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER \ +- (UNITS_PER_WORD * 4) ++#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4) + + /* The maximum number of bytes that can be copied by a straight-line + implementation of movmemsi; see loongarch_block_move_straight. We want + to make sure that any loop-based implementation will iterate at + least twice. */ +-#define LARCH_MAX_MOVE_BYTES_STRAIGHT \ +- (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2) ++#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2) + + /* The base cost of a memcpy call, for MOVE_RATIO and friends. These + values were determined experimentally by benchmarking with CSiBE. +@@ -1847,73 +1213,29 @@ while (0) + we'll have to generate a load/store pair for each, halve the + value of LARCH_CALL_RATIO to take that into account. */ + +-#define MOVE_RATIO(speed) \ +- (HAVE_movmemsi \ ++#define MOVE_RATIO(speed) \ ++ (HAVE_movmemsi \ + ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \ + : CLEAR_RATIO (speed) / 2) + + /* For CLEAR_RATIO, when optimizing for size, give a better estimate + of the length of a memset call, but use the default otherwise. */ + +-#define CLEAR_RATIO(speed)\ +- ((speed) ? 15 : LARCH_CALL_RATIO) ++#define CLEAR_RATIO(speed) ((speed) ? 15 : LARCH_CALL_RATIO) + + /* This is similar to CLEAR_RATIO, but for a non-zero constant, so when + optimizing for size adjust the ratio to account for the overhead of + loading the constant and replicating it across the word. */ + +-#define SET_RATIO(speed) \ +- ((speed) ? 15 : LARCH_CALL_RATIO - 2) +- +-/* Since the bits of the _init and _fini function is spread across +- many object files, each potentially with its own GP, we must assume +- we need to load our GP. We don't preserve $gp or $ra, since each +- init/fini chunk is supposed to initialize $gp, and crti/crtn +- already take care of preserving $ra and, when appropriate, $gp. */ +-#if (defined _ABI64 && _LARCH_SIM == _ABI64) +-#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ +- asm (SECTION_OP "\n\ +- .set push\n\ +- la $r20, " USER_LABEL_PREFIX #FUNC "\n\ +- jirl $r1, $r20, 0\n\ +- .set pop\n\ +- " TEXT_SECTION_ASM_OP); +-#endif +-#ifndef HAVE_AS_TLS +-#define HAVE_AS_TLS 0 +-#endif +- +-#ifndef HAVE_AS_NAN +-#define HAVE_AS_NAN 0 +-#endif ++#define SET_RATIO(speed) ((speed) ? 15 : LARCH_CALL_RATIO - 2) + + #ifndef USED_FOR_TARGET +-/* Information about ".set noFOO; ...; .set FOO" blocks. */ +-struct loongarch_asm_switch { +- /* The FOO in the description above. */ +- const char *name; +- +- /* The current block nesting level, or 0 if we aren't in a block. */ +- int nesting_level; +-}; +- + extern const enum reg_class loongarch_regno_to_class[]; +-extern const char *current_function_file; /* filename current function is in */ +-extern int num_source_filenames; /* current .file # */ +-extern int loongarch_dbx_regno[]; + extern int loongarch_dwarf_regno[]; +-extern bool loongarch_split_p[]; +-extern bool loongarch_use_pcrel_pool_p[]; +-extern enum processor loongarch_arch; /* which cpu to codegen for */ +-extern enum processor loongarch_tune; /* which cpu to schedule for */ +-extern int loongarch_isa; /* architectural level */ +-extern int loongarch_isa_rev; +-extern const struct loongarch_cpu_info *loongarch_arch_info; +-extern const struct loongarch_cpu_info *loongarch_tune_info; +-extern unsigned int loongarch_base_compression_flags; + + /* Information about a function's frame layout. */ +-struct GTY(()) loongarch_frame_info { ++struct GTY (()) loongarch_frame_info ++{ + /* The size of the frame in bytes. */ + HOST_WIDE_INT total_size; + +@@ -1930,216 +1252,67 @@ struct GTY(()) loongarch_frame_info { + /* Bit X is set if the function saves or restores GPR X. */ + unsigned int mask; + ++ unsigned int gpr_saved_num; ++ + /* Likewise FPR X. */ + unsigned int fmask; + +- /* Likewise doubleword accumulator X ($acX). */ +- unsigned int acc_mask; +- +- /* The number of GPRs, FPRs, doubleword accumulators and COP0 +- registers saved. */ +- unsigned int num_gp; +- unsigned int num_fp; +- unsigned int num_acc; +- unsigned int num_cop0_regs; +- +- /* The offset of the topmost GPR, FPR, accumulator and COP0-register +- save slots from the top of the frame, or zero if no such slots are +- needed. */ +- HOST_WIDE_INT gp_save_offset; +- HOST_WIDE_INT fp_save_offset; +- HOST_WIDE_INT acc_save_offset; +- HOST_WIDE_INT cop0_save_offset; +- +- /* Likewise, but giving offsets from the bottom of the frame. */ ++ /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ ++ unsigned save_libcall_adjustment; ++ ++ /* Offsets of fixed-point and floating-point save areas from frame ++ bottom. */ + HOST_WIDE_INT gp_sp_offset; + HOST_WIDE_INT fp_sp_offset; +- HOST_WIDE_INT acc_sp_offset; +- HOST_WIDE_INT cop0_sp_offset; + +- /* Similar, but the value passed to _mcount. */ +- HOST_WIDE_INT ra_fp_offset; +- +- /* The offset of arg_pointer_rtx from the bottom of the frame. */ +- HOST_WIDE_INT arg_pointer_offset; ++ /* Offset of virtual frame pointer from stack pointer/frame bottom. */ ++ HOST_WIDE_INT frame_pointer_offset; + +- /* The offset of hard_frame_pointer_rtx from the bottom of the frame. */ ++ /* Offset of hard frame pointer from stack pointer/frame bottom. */ + HOST_WIDE_INT hard_frame_pointer_offset; + +- /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ +- unsigned save_libcall_adjustment; +- +- /* Offset of virtual frame pointer from stack pointer/frame bottom */ +- HOST_WIDE_INT frame_pointer_offset; +-}; +- +-/* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts. */ +-enum loongarch_int_mask +-{ +- INT_MASK_EIC = -1, +- INT_MASK_SW0 = 0, +- INT_MASK_SW1 = 1, +- INT_MASK_HW0 = 2, +- INT_MASK_HW1 = 3, +- INT_MASK_HW2 = 4, +- INT_MASK_HW3 = 5, +- INT_MASK_HW4 = 6, +- INT_MASK_HW5 = 7 ++ /* The offset of arg_pointer_rtx from the bottom of the frame. */ ++ HOST_WIDE_INT arg_pointer_offset; + }; + +-/* Enumeration to mark the existence of the shadow register set. +- SHADOW_SET_INTSTACK indicates a shadow register set with a valid stack +- pointer. */ +-enum loongarch_shadow_set ++struct GTY (()) machine_function + { +- SHADOW_SET_NO, +- SHADOW_SET_YES, +- SHADOW_SET_INTSTACK +-}; +- +-struct GTY(()) machine_function { + /* The next floating-point condition-code register to allocate +- for 8CC targets, relative to ST_REG_FIRST. */ ++ for 8CC targets, relative to FCC_REG_FIRST. */ + unsigned int next_fcc; + + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the frame. */ + int varargs_size; + +- /* The current frame information, calculated by loongarch_compute_frame_info. */ ++ /* The current frame information, calculated by loongarch_compute_frame_info. ++ */ + struct loongarch_frame_info frame; +- +- /* How many instructions it takes to load a label into $AT, or 0 if +- this property hasn't yet been calculated. */ +- unsigned int load_label_num_insns; +- +- /* True if loongarch_adjust_insn_length should ignore an instruction's +- hazard attribute. */ +- bool ignore_hazard_length_p; +- +- /* True if the whole function is suitable for .set noreorder and +- .set nomacro. */ +- bool all_noreorder_p; +- +- /* True if the function has "inflexible" and "flexible" references +- to the global pointer. See loongarch_cfun_has_inflexible_gp_ref_p +- and loongarch_cfun_has_flexible_gp_ref_p for details. */ +- bool has_inflexible_gp_insn_p; +- bool has_flexible_gp_insn_p; +- +- /* True if the function's prologue must load the global pointer +- value into pic_offset_table_rtx and store the same value in +- the function's cprestore slot (if any). Even if this value +- is currently false, we may decide to set it to true later; +- see loongarch_must_initialize_gp_p () for details. */ +- bool must_initialize_gp_p; +- +- /* True if the current function must restore $gp after any potential +- clobber. This value is only meaningful during the first post-epilogue +- split_insns pass; see loongarch_must_initialize_gp_p () for details. */ +- bool must_restore_gp_when_clobbered_p; +- +- /* True if this is an interrupt handler. */ +- bool interrupt_handler_p; +- +- /* Records the way in which interrupts should be masked. Only used if +- interrupts are not kept masked. */ +- enum loongarch_int_mask int_mask; +- +- /* Records if this is an interrupt handler that uses shadow registers. */ +- enum loongarch_shadow_set use_shadow_register_set; +- +- /* True if this is an interrupt handler that should keep interrupts +- masked. */ +- bool keep_interrupts_masked_p; +- +- /* True if this is an interrupt handler that should use DERET +- instead of ERET. */ +- bool use_debug_exception_return_p; +- +- /* True if at least one of the formal parameters to a function must be +- written to the frame header (probably so its address can be taken). */ +- bool does_not_use_frame_header; +- +- /* True if none of the functions that are called by this function need +- stack space allocated for their arguments. */ +- bool optimize_call_stack; +- +- /* True if one of the functions calling this function may not allocate +- a frame header. */ +- bool callers_may_not_allocate_frame; +- +- /* True if GCC stored callee saved registers in the frame header. */ +- bool use_frame_header_for_callee_saved_regs; + }; + #endif + +-/* Enable querying of DFA units. */ +-#define CPU_UNITS_QUERY 0 +- +-/* As on most targets, we want the .eh_frame section to be read-only where +- possible. And as on most targets, this means two things: +- +- (a) Non-locally-binding pointers must have an indirect encoding, +- so that the addresses in the .eh_frame section itself become +- locally-binding. +- +- (b) A shared library's .eh_frame section must encode locally-binding +- pointers in a relative (relocation-free) form. +- +- However, LARCH has traditionally not allowed directives like: +- +- .long x-. +- +- in cases where "x" is in a different section, or is not defined in the +- same assembly file. We are therefore unable to emit the PC-relative +- form required by (b) at assembly time. +- +- Fortunately, the linker is able to convert absolute addresses into +- PC-relative addresses on our behalf. Unfortunately, only certain +- versions of the linker know how to do this for indirect pointers, +- and for personality data. We must fall back on using writable +- .eh_frame sections for shared libraries if the linker does not +- support this feature. */ +-#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ ++#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr) + +-#define SWITCHABLE_TARGET 1 +- +-/* Several named LARCH patterns depend on Pmode. These patterns have the +- form _si for Pmode == SImode and _di for Pmode == DImode. ++/* Several named LoongArch patterns depend on Pmode. These patterns have the ++ form si for Pmode == SImode and di for Pmode == DImode. + Add the appropriate suffix to generator function NAME and invoke it + with arguments ARGS. */ + #define PMODE_INSN(NAME, ARGS) \ +- (Pmode == SImode ? NAME ## _si ARGS : NAME ## _di ARGS) ++ (Pmode == SImode ? NAME##si ARGS : NAME##di ARGS) ++ ++/* Do emit .note.GNU-stack by default. */ ++#ifndef NEED_INDICATE_EXEC_STACK ++#define NEED_INDICATE_EXEC_STACK 1 ++#endif + +-/***********************/ +-/* N_LARCH-PORT */ +-/***********************/ + /* The `Q' extension is not yet supported. */ +-/* TODO: according to march */ ++/* TODO: according to march. */ + #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) + + /* The largest type that can be passed in floating-point registers. */ +-/* TODO: according to mabi */ +-#define UNITS_PER_FP_ARG (TARGET_HARD_FLOAT ? (TARGET_64BIT ? 8 : 4) : 0) +- +-/* Internal macros to classify an ISA register's type. */ +- +-#define GP_TEMP_FIRST (GP_REG_FIRST + 12) +- +-#define CALLEE_SAVED_REG_NUMBER(REGNO) \ +- ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) - 22 : -1) +- +-#define N_LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1) +-#define N_LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, N_LARCH_PROLOGUE_TEMP_REGNUM) +- +-#define LIBCALL_VALUE(MODE) \ +- loongarch_function_value (NULL_TREE, NULL_TREE, MODE) +- +-#define FUNCTION_VALUE(VALTYPE, FUNC) \ +- loongarch_function_value (VALTYPE, FUNC, VOIDmode) +- +-#define FRAME_GROWS_DOWNWARD 1 ++/* TODO: according to mabi. */ ++#define UNITS_PER_FP_ARG \ ++ (TARGET_HARD_FLOAT ? (TARGET_DOUBLE_FLOAT ? 8 : 4) : 0) + + #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index be950c9e4..097c9f4db 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1,7 +1,7 @@ +-;; Loongarch.md Machine Description for LARCH based processors +-;; Copyright (C) 1989-2018 Free Software Foundation, Inc. +-;; Contributed by A. Lichnewsky, lich@inria.inria.fr +-;; Changes by Michael Meissner, meissner@osf.org ++;; Machine Description for LoongArch for GNU compiler. ++;; Copyright (C) 2020-2022 Free Software Foundation, Inc. ++;; Contributed by Loongson Ltd. ++;; Based on MIPS target for GNU compiler. + + ;; This file is part of GCC. + +@@ -19,118 +19,96 @@ + ;; along with GCC; see the file COPYING3. If not see + ;; . + +-(define_enum "processor" [ +- loongarch +- loongarch64 +- la464 +-]) +- + (define_c_enum "unspec" [ + ;; Integer operations that are too cumbersome to describe directly. +- UNSPEC_WSBH +- UNSPEC_DSBH +- UNSPEC_DSHD ++ UNSPEC_REVB_2H ++ UNSPEC_REVB_4H ++ UNSPEC_REVH_D + + ;; Floating-point moves. + UNSPEC_LOAD_LOW + UNSPEC_LOAD_HIGH + UNSPEC_STORE_WORD + UNSPEC_MOVGR2FRH ++ UNSPEC_MOVGR2FR + UNSPEC_MOVFRH2GR ++ UNSPEC_MOVFR2GR ++ UNSPEC_MOVFCC2GR ++ UNSPEC_MOVGR2FCC ++ UNSPEC_MOVFR2FCC + +- ;; Floating-point environment. +- UNSPEC_MOVFCSR2GR +- UNSPEC_MOVGR2FCSR ++ ;; Floating point unspecs. ++ UNSPEC_FRINT ++ UNSPEC_FCLASS ++ UNSPEC_FCOPYSIGN + +- ;; GP manipulation. ++ ;; Override return address for exception handling. + UNSPEC_EH_RETURN + +- ;; +- UNSPEC_FRINT +- UNSPEC_FCLASS ++ ;; Bit operation + UNSPEC_BYTEPICK_W + UNSPEC_BYTEPICK_D + UNSPEC_BITREV_4B + UNSPEC_BITREV_8B + +- ;; Symbolic accesses. +- UNSPEC_LOAD_CALL +- +- ;; Blockage and synchronisation. +- UNSPEC_BLOCKAGE +- UNSPEC_DBAR +- UNSPEC_IBAR +- +- ;; CPUCFG +- UNSPEC_CPUCFG +- UNSPEC_ASRTLE_D +- UNSPEC_ASRTGT_D +- +- UNSPEC_CSRRD +- UNSPEC_CSRWR +- UNSPEC_CSRXCHG +- UNSPEC_IOCSRRD +- UNSPEC_IOCSRWR +- +- ;; cacop +- UNSPEC_CACOP +- +- ;; pte +- UNSPEC_LDDIR +- UNSPEC_LDPTE +- +- ;; Cache manipulation. +- UNSPEC_LARCH_CACHE +- +- ;; Interrupt handling. +- UNSPEC_ERTN +- UNSPEC_DI +- UNSPEC_EHB +- UNSPEC_RDPGPR +- +- ;; Used in a call expression in place of args_size. It's present for PIC +- ;; indirect calls where it contains args_size and the function symbol. +- UNSPEC_CALL_ATTR +- +- +- ;; Stack checking. +- UNSPEC_PROBE_STACK_RANGE +- +- ;; The `.insn' pseudo-op. +- UNSPEC_INSN_PSEUDO +- + ;; TLS + UNSPEC_TLS_GD + UNSPEC_TLS_LD + UNSPEC_TLS_LE + UNSPEC_TLS_IE + +- UNSPEC_LU52I_D +- ++ ;; Stack tie + UNSPEC_TIE + + ;; CRC + UNSPEC_CRC + UNSPEC_CRCC +- UNSPEC_ADDRESS_FIRST +-]) + +-(define_c_enum "unspecv" [ +- ;; Register save and restore. +- UNSPECV_GPR_SAVE +- UNSPECV_GPR_RESTORE ++ ;; RSQRT ++ UNSPEC_RSQRT ++ UNSPEC_RSQRTE + +- UNSPECV_MOVE_EXTREME ++ ;; RECIP ++ UNSPEC_RECIPE + ]) + ++(define_c_enum "unspecv" [ ++ ;; Blockage and synchronisation. ++ UNSPECV_BLOCKAGE ++ UNSPECV_DBAR ++ UNSPECV_IBAR ++ ++ ;; Privileged instructions ++ UNSPECV_CSRRD ++ UNSPECV_CSRWR ++ UNSPECV_CSRXCHG ++ UNSPECV_IOCSRRD ++ UNSPECV_IOCSRWR ++ UNSPECV_CACOP ++ UNSPECV_LDDIR ++ UNSPECV_LDPTE ++ UNSPECV_ERTN ++ ++ ;; Stack checking ++ UNSPECV_PROBE_STACK_RANGE ++ ++ ;; Floating-point environment ++ UNSPECV_MOVFCSR2GR ++ UNSPECV_MOVGR2FCSR ++ ++ ;; Others ++ UNSPECV_CPUCFG ++ UNSPECV_ASRTLE_D ++ UNSPECV_ASRTGT_D ++ UNSPECV_SYSCALL ++ UNSPECV_BREAK ++]) + + (define_constants + [(RETURN_ADDR_REGNUM 1) + (T0_REGNUM 12) + (T1_REGNUM 13) + (S0_REGNUM 23) +- (S1_REGNUM 24) +- (S2_REGNUM 25) + + ;; PIC long branch sequences are never longer than 100 bytes. + (MAX_PIC_BRANCH_LENGTH 100) +@@ -148,9 +126,9 @@ + (define_attr "got" "unset,load" + (const_string "unset")) + +-;; For jal instructions, this attribute is DIRECT when the target address ++;; For jirl instructions, this attribute is DIRECT when the target address + ;; is symbolic and INDIRECT when it is a register. +-(define_attr "jal" "unset,direct,indirect" ++(define_attr "jirl" "unset,direct,indirect" + (const_string "unset")) + + +@@ -158,7 +136,7 @@ + ;; are as for "type" (see below) but there are also the following + ;; move-specific values: + ;; +-;; sll0 "sll DEST,SRC,0", which on 64-bit targets is guaranteed ++;; sll0 "slli.w DEST,SRC,0", which on 64-bit targets is guaranteed + ;; to produce a sign-extended DEST, even if SRC is not + ;; properly sign-extended + ;; pick_ins BSTRPICK.W, BSTRPICK.D, BSTRINS.W or BSTRINS.D instruction +@@ -207,59 +185,6 @@ + (const_string "yes")] + (const_string "no"))) + +-;; Attributes describing a sync loop. These loops have the form: +-;; +-;; if (RELEASE_BARRIER == YES) sync +-;; 1: OLDVAL = *MEM +-;; if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2 +-;; CMP = 0 [delay slot] +-;; $TMP1 = OLDVAL & EXCLUSIVE_MASK +-;; $TMP2 = INSN1 (OLDVAL, INSN1_OP2) +-;; $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK) +-;; $AT |= $TMP1 | $TMP3 +-;; if (!commit (*MEM = $AT)) goto 1. +-;; if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot] +-;; CMP = 1 +-;; if (ACQUIRE_BARRIER == YES) sync +-;; 2: +-;; +-;; where "$" values are temporaries and where the other values are +-;; specified by the attributes below. Values are specified as operand +-;; numbers and insns are specified as enums. If no operand number is +-;; specified, the following values are used instead: +-;; +-;; - OLDVAL: $AT +-;; - CMP: NONE +-;; - NEWVAL: $AT +-;; - INCLUSIVE_MASK: -1 +-;; - REQUIRED_OLDVAL: OLDVAL & INCLUSIVE_MASK +-;; - EXCLUSIVE_MASK: 0 +-;; +-;; MEM and INSN1_OP2 are required. +-;; +-;; Ideally, the operand attributes would be integers, with -1 meaning "none", +-;; but the gen* programs don't yet support that. +-(define_attr "sync_mem" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_oldval" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_cmp" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_newval" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_inclusive_mask" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_exclusive_mask" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_required_oldval" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_insn1_op2" "none,0,1,2,3,4,5" (const_string "none")) +-(define_attr "sync_insn1" "move,li,addu,addiu,subu,and,andi,or,ori,xor,xori" +- (const_string "move")) +-(define_attr "sync_insn2" "nop,and,xor,not" +- (const_string "nop")) +-;; Memory model specifier. +-;; "0"-"9" values specify the operand that stores the memory model value. +-;; "10" specifies MEMMODEL_ACQ_REL, +-;; "11" specifies MEMMODEL_ACQUIRE. +-(define_attr "sync_memmodel" "" (const_int 10)) +- +-;; Accumulator operand for madd patterns. +-(define_attr "accum_in" "none,0,1,2,3,4,5" (const_string "none")) +- + ;; Classification of each insn. + ;; branch conditional branch + ;; jump unconditional jump +@@ -273,8 +198,8 @@ + ;; prefetch memory prefetch (register + offset) + ;; prefetchx memory indexed prefetch (register + register) + ;; condmove conditional moves +-;; mgtf move generate register to float register +-;; mftg move float register to generate register ++;; mgtf move general-purpose register to floating point register ++;; mftg move floating point register to general-purpose register + ;; const load constant + ;; arith integer arithmetic instructions + ;; logical integer logical instructions +@@ -283,10 +208,9 @@ + ;; signext sign extend instructions + ;; clz the clz and clo instructions + ;; trap trap if instructions +-;; imul integer multiply 2 operands +-;; imul3 integer multiply 3 operands +-;; idiv3 integer divide 3 operands +-;; move integer register move ({,D}ADD{,U} with rt = 0) ++;; imul integer multiply ++;; idiv integer divide ++;; move integer move + ;; fmove floating point register move + ;; fadd floating point add/subtract + ;; fmul floating point multiply +@@ -296,9 +220,11 @@ + ;; fabs floating point absolute value + ;; fneg floating point negation + ;; fcmp floating point compare ++;; fcopysign floating point copysign + ;; fcvt floating point convert + ;; fsqrt floating point square root + ;; frsqrt floating point reciprocal square root ++;; frsqrte float point reciprocal square root approximate + ;; multi multiword sequence (or user asm statements) + ;; atomic atomic memory update instruction + ;; syncloop memory atomic operation implemented as a sync loop +@@ -307,16 +233,15 @@ + (define_attr "type" + "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, + prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, +- shift,slt,signext,clz,trap,imul,imul3,idiv3,move, +- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt, +- frsqrt,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat, +- multi,atomic,syncloop,nop,ghost, ++ shift,slt,signext,clz,trap,imul,idiv,move, ++ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt, ++ frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, + simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, + simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, + simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, + simd_permute,simd_shf,simd_sat,simd_pcnt,simd_copy,simd_branch,simd_clsx, + simd_fminmax,simd_logic,simd_move,simd_load,simd_store" +- (cond [(eq_attr "jal" "!unset") (const_string "call") ++ (cond [(eq_attr "jirl" "!unset") (const_string "call") + (eq_attr "got" "load") (const_string "load") + + (eq_attr "alu_type" "add,sub") (const_string "arith") +@@ -362,35 +287,22 @@ + (eq_attr "dword_mode" "yes")) + (const_string "multi") + (eq_attr "move_type" "move") (const_string "move") +- (eq_attr "move_type" "const") (const_string "const") +- (eq_attr "sync_mem" "!none") (const_string "syncloop")] ++ (eq_attr "move_type" "const") (const_string "const")] + (const_string "unknown"))) + +-(define_attr "compact_form" "always,maybe,never" +- (cond [(eq_attr "jal" "direct") +- (const_string "always") +- (eq_attr "jal" "indirect") +- (const_string "maybe") +- (eq_attr "type" "jump") +- (const_string "maybe")] +- (const_string "never"))) +- + ;; Mode for conversion types (fcvt) +-;; I2S integer to float single (SI/DI to SF) +-;; I2D integer to float double (SI/DI to DF) +-;; S2I float to integer (SF to SI/DI) +-;; D2I float to integer (DF to SI/DI) +-;; D2S double to float single +-;; S2D float single to double +- +-(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" ++;; I2S integer to float single (SI/DI to SF) ++;; I2D integer to float double (SI/DI to DF) ++;; S2I float to integer (SF to SI/DI) ++;; D2I float to integer (DF to SI/DI) ++;; D2S double to float single ++;; S2D float single to double ++;; C2D fcc to DI ++ ++(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" + (const_string "unknown")) + +-(define_attr "compression" "none,all" +- (const_string "none")) +- +-;; The number of individual instructions that a non-branch pattern generates, +-;; using units of BASE_INSN_LENGTH. ++;; The number of individual instructions that a non-branch pattern generates + (define_attr "insn_count" "" + (cond [;; "Ghost" instructions occupy no space. + (eq_attr "type" "ghost") +@@ -425,84 +337,30 @@ + (eq_attr "move_type" "store,fpstore") + (symbol_ref "loongarch_load_store_insns (operands[0], insn)") + +- (eq_attr "type" "idiv3") ++ (eq_attr "type" "idiv") + (symbol_ref "loongarch_idiv_insns (GET_MODE (PATTERN (insn)))")] + (const_int 1))) + +-;; Length of instruction in bytes. The default is derived from "insn_count", +-;; but there are special cases for branches (which must be handled here) +-;; and for compressed single instructions. +- +- +- ++;; Length of instruction in bytes. + (define_attr "length" "" + (cond [ +- ;; Branch instructions have a range of [-0x20000,0x1fffc]. +- ;; If a branch is outside this range, we have a choice of two +- ;; sequences. +- ;; +- ;; For PIC, an out-of-range branch like: +- ;; +- ;; bne r1,r2,target +- ;; +- ;; becomes the equivalent of: +- ;; +- ;; beq r1,r2,1f +- ;; la rd,target +- ;; jr rd +- ;; 1: +- ;; +- ;; The non-PIC case is similar except that we use a direct +- ;; jump instead of an la/jr pair. Since the target of this +- ;; jump is an absolute 28-bit bit address (the other bits +- ;; coming from the address of the delay slot) this form cannot +- ;; cross a 256MB boundary. We could provide the option of +- ;; using la/jr in this case too, but we do not do so at +- ;; present. +- ;; +- ;; from the shorten_branches reference address. +- (eq_attr "type" "branch") +- (cond [;; Any variant can handle the 17-bit range. +- (and (le (minus (match_dup 0) (pc)) (const_int 65532)) +- (le (minus (pc) (match_dup 0)) (const_int 65534))) +- (const_int 4) +- +- ;; The non-PIC case: branch, and J. +- (match_test "TARGET_ABSOLUTE_JUMPS") +- (const_int 8)] +- +- ;; Use MAX_PIC_BRANCH_LENGTH as a (gross) overestimate. +- ;; loongarch_adjust_insn_length substitutes the correct length. +- ;; +- ;; Note that we can't simply use (symbol_ref ...) here +- ;; because genattrtab needs to know the maximum length +- ;; of an insn. +- (const_int MAX_PIC_BRANCH_LENGTH)) +- ] +- (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH"))) +- +-;; Attribute describing the processor. +-(define_enum_attr "cpu" "processor" +- (const (symbol_ref "loongarch_tune"))) ++ ;; Branching further than +/- 128 KiB requires two instructions. ++ (eq_attr "type" "branch") ++ (if_then_else (and (le (minus (match_dup 0) (pc)) (const_int 131064)) ++ (le (minus (pc) (match_dup 0)) (const_int 131068))) ++ (const_int 4) ++ (const_int 8))] ++ (symbol_ref "get_attr_insn_count (insn) * 4"))) + + ;; The type of hardware hazard associated with this instruction. + ;; DELAY means that the next instruction cannot read the result + ;; of this one. +-(define_attr "hazard" "none,delay,forbidden_slot" ++(define_attr "hazard" "none,forbidden_slot" + (const_string "none")) + +-;; Can the instruction be put into a delay slot? +-(define_attr "can_delay" "no,yes" +- (if_then_else (and (eq_attr "type" "!branch,call,jump") +- (eq_attr "hazard" "none") +- (match_test "get_attr_insn_count (insn) == 1")) +- (const_string "yes") +- (const_string "no"))) +- + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi") +- (set_attr "can_delay" "no")]) ++ [(set_attr "type" "multi")]) + + ;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated + ;; from the same template. +@@ -512,141 +370,99 @@ + ;; modes. + (define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")]) + +-;; Likewise, but for XLEN-sized quantities. +-(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) +- +-(define_mode_iterator MOVEP1 [SI SF]) +-(define_mode_iterator MOVEP2 [SI SF]) ++;; This mode iterator allows 16-bit and 32-bit GPR patterns and 32-bit 64-bit ++;; FPR patterns to be generated from the same template. + (define_mode_iterator JOIN_MODE [HI + SI + (SF "TARGET_HARD_FLOAT") +- (DF "TARGET_HARD_FLOAT +- && TARGET_DOUBLE_FLOAT")]) ++ (DF "TARGET_DOUBLE_FLOAT")]) + + ;; This mode iterator allows :P to be used for patterns that operate on + ;; pointer-sized quantities. Exactly one of the two alternatives will match. + (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +-;; 32-bit integer moves for which we provide move patterns. +-(define_mode_iterator IMOVE32 +- [SI]) ++;; Likewise, but for XLEN-sized quantities. ++(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) + + ;; 64-bit modes for which we provide move patterns. +-(define_mode_iterator MOVE64 +- [DI DF]) ++(define_mode_iterator MOVE64 [DI DF]) + + ;; 128-bit modes for which we provide move patterns on 64-bit targets. + (define_mode_iterator MOVE128 [TI TF]) + +-;; This mode iterator allows the QI and HI extension patterns to be +-;; defined from the same template. ++;; Iterator for sub-32-bit integer modes. + (define_mode_iterator SHORT [QI HI]) + + ;; Likewise the 64-bit truncate-and-shift patterns. + (define_mode_iterator SUBDI [QI HI SI]) + +-;; This mode iterator allows the QI HI SI and DI extension patterns to be ++;; Iterator for scalar fixed-point modes. + (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")]) + +- +-;; This mode iterator allows :ANYF to be used wherever a scalar or vector +-;; floating-point mode is allowed. ++;; Iterator for hardware-supported floating-point modes. + (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT") +- (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")]) +- +-;; Like ANYF, but only applies to scalar modes. +-(define_mode_iterator SCALARF [(SF "TARGET_HARD_FLOAT") +- (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")]) ++ (DF "TARGET_DOUBLE_FLOAT")]) + + ;; A floating-point mode for which moves involving FPRs may need to be split. + (define_mode_iterator SPLITF + [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") + (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (TF "TARGET_64BIT && TARGET_FLOAT64")]) ++ (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")]) + +-;; In GPR templates, a string like "mul." will expand to "mul" in the +-;; 32-bit "mul.w" and "mul.d" in the 64-bit version. ++;; In GPR templates, a string like "mul." will expand to "mul.w" in the ++;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d [(SI "w") (DI "d")]) + +-;; Same as d but upper-case. +-(define_mode_attr D [(SI "") (DI "D")]) +- + ;; This attribute gives the length suffix for a load or store instruction. + ;; The same suffixes work for zero and sign extensions. + (define_mode_attr size [(QI "b") (HI "h") (SI "w") (DI "d")]) + (define_mode_attr SIZE [(QI "B") (HI "H") (SI "W") (DI "D")]) + +-;; This attributes gives the mode mask of a SHORT. ++;; This attribute gives the mode mask of a SHORT. + (define_mode_attr mask [(QI "0x00ff") (HI "0xffff")]) + +-;; This attributes gives the size (bits) of a SHORT. +-(define_mode_attr qi_hi [(QI "7") (HI "15")]) +- +-;; Mode attributes for GPR loads. +-(define_mode_attr load [(SI "lw") (DI "ld")]) ++;; This attribute gives the size (bits) of a SHORT. ++(define_mode_attr 7_or_15 [(QI "7") (HI "15")]) + +-(define_mode_attr load_l [(SI "ld.w") (DI "ld.d")]) + ;; Instruction names for stores. + (define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd")]) + +-;; Similarly for LARCH IV indexed FPR loads and stores. +-(define_mode_attr floadx [(SF "fldx.s") (DF "fldx.d") (V2SF "fldx.d")]) +-(define_mode_attr fstorex [(SF "fstx.s") (DF "fstx.d") (V2SF "fstx.d")]) +- +-;; Similarly for LOONGSON indexed GPR loads and stores. ++;; Similarly for LoongArch indexed GPR loads and stores. + (define_mode_attr loadx [(QI "ldx.b") +- (HI "ldx.h") +- (SI "ldx.w") +- (DI "ldx.d")]) ++ (HI "ldx.h") ++ (SI "ldx.w") ++ (DI "ldx.d")]) + (define_mode_attr storex [(QI "stx.b") +- (HI "stx.h") +- (SI "stx.w") +- (DI "stx.d")]) +- +-;; This attribute gives the best constraint to use for registers of +-;; a given mode. +-(define_mode_attr reg [(SI "d") (DI "d") (FCC "z")]) ++ (HI "stx.h") ++ (SI "stx.w") ++ (DI "stx.d")]) + + ;; This attribute gives the format suffix for floating-point operations. + (define_mode_attr fmt [(SF "s") (DF "d") (V2SF "ps")]) ++(define_mode_attr ifmt [(SI "w") (DI "l")]) + + ;; This attribute gives the upper-case mode name for one unit of a + ;; floating-point mode or vector mode. + (define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF") (V4SF "SF") +- (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") +- (V2DF "DF")(V8SF "SF")(V32QI "QI")(V16HI "HI")(V8SI "SI")(V4DI "DI")(V4DF "DF")]) ++ (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") ++ (V2DF "DF")(V8SF "SF")(V32QI "QI")(V16HI "HI")(V8SI "SI")(V4DI "DI")(V4DF "DF")]) + + ;; As above, but in lower case. + (define_mode_attr unitmode [(SF "sf") (DF "df") (V2SF "sf") (V4SF "sf") +- (V16QI "qi") (V8QI "qi") (V8HI "hi") (V4HI "hi") +- (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df") +- (V8SI "si") (V4DI "di") (V32QI "qi") (V16HI "hi") ++ (V16QI "qi") (V8QI "qi") (V8HI "hi") (V4HI "hi") ++ (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df") ++ (V8SI "si") (V4DI "di") (V32QI "qi") (V16HI "hi") + (V8SF "sf") (V4DF "df")]) + + ;; This attribute gives the integer mode that has half the size of + ;; the controlling mode. + (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI") +- (V2SI "SI") (V4HI "SI") (V8QI "SI") +- (TF "DI")]) ++ (V2SI "SI") (V4HI "SI") (V8QI "SI") ++ (TF "DI")]) + ++;; This attribute gives the integer prefix for some instructions templates. + (define_mode_attr p [(SI "") (DI "d")]) + +-;; This attribute works around the early SB-1 rev2 core "F2" erratum: +-;; +-;; In certain cases, div.s and div.ps may have a rounding error +-;; and/or wrong inexact flag. +-;; +-;; Therefore, we only allow div.s if not working around SB-1 rev2 +-;; errata or if a slight loss of precision is OK. +-(define_mode_attr divide_condition +- [DF (SF "flag_unsafe_math_optimizations") +- (V2SF "TARGET_SB1 && (flag_unsafe_math_optimizations)")]) +- +-;; This attribute gives the conditions under which SQRT.fmt instructions +-;; can be used. +-(define_mode_attr sqrt_condition +- [SF DF (V2SF "TARGET_SB1")]) +- + ;; This code iterator allows signed and unsigned widening multiplications + ;; to use the same template. + (define_code_iterator any_extend [sign_extend zero_extend]) +@@ -659,13 +475,10 @@ + ;; from the same template. + (define_code_iterator any_shift [ashift ashiftrt lshiftrt]) + +-;; This code iterator allows unsigned and signed division to be generated +-;; from the same template. +-(define_code_iterator any_div [div udiv]) +- +-;; This code iterator allows unsigned and signed modulus to be generated ++;; This code iterator allows the three bitwise instructions to be generated + ;; from the same template. +-(define_code_iterator any_mod [mod umod]) ++(define_code_iterator any_bitwise [and ior xor]) ++(define_code_iterator neg_bitwise [and ior]) + + ;; This code iterator allows addition and subtraction to be generated + ;; from the same template. +@@ -679,13 +492,14 @@ + ;; from the same template + (define_code_iterator addsubmul [plus minus mult]) + ++;; This code iterator allows unsigned and signed division to be generated ++;; from the same template. ++(define_code_iterator any_div [div udiv mod umod]) ++ + ;; This code iterator allows all native floating-point comparisons to be + ;; generated from the same template. +-(define_code_iterator fcond [unordered uneq unlt unle eq lt le ordered ltgt ne]) +- +-;; This code iterator is used for comparisons that can be implemented +-;; by swapping the operands. +-(define_code_iterator swapped_fcond [ge gt unge ungt]) ++(define_code_iterator fcond [unordered uneq unlt unle eq lt le ++ ordered ltgt ne ge gt unge ungt]) + + ;; Equality operators. + (define_code_iterator equality_op [eq ne]) +@@ -725,6 +539,10 @@ + (plus "add") + (minus "sub") + (mult "mul") ++ (div "div") ++ (udiv "udiv") ++ (mod "mod") ++ (umod "umod") + (return "return") + (simple_return "simple_return")]) + +@@ -736,15 +554,13 @@ + (xor "xor") + (and "and") + (plus "addu") +- (minus "subu")]) +- +-;; expands to the name of the insn that implements +-;; a particular code to operate on immediate values. +-(define_code_attr immediate_insn [(ior "ori") +- (xor "xori") +- (and "andi")]) ++ (minus "subu") ++ (div "div") ++ (udiv "div") ++ (mod "mod") ++ (umod "mod")]) + +-;; is the c.cond.fmt condition associated with a particular code. ++;; is the fcmp.cond.fmt condition associated with a particular code. + (define_code_attr fcond [(unordered "cun") + (uneq "cueq") + (unlt "cult") +@@ -754,48 +570,17 @@ + (le "sle") + (ordered "cor") + (ltgt "sne") +- (ne "cune")]) +- +-;; Similar, but for swapped conditions. +-(define_code_attr swapped_fcond [(ge "sle") +- (gt "slt") +- (unge "cule") +- (ungt "cult")]) +- +-;; The value of the bit when the branch is taken for branch_bit patterns. +-;; Comparison is always against zero so this depends on the operator. +-(define_code_attr bbv [(eq "0") (ne "1")]) +- +-;; This is the inverse value of bbv. +-(define_code_attr bbinv [(eq "1") (ne "0")]) ++ (ne "cune") ++ (ge "sge") ++ (gt "sgt") ++ (unge "cuge") ++ (ungt "cugt")]) + + ;; The sel mnemonic to use depending on the condition test. + (define_code_attr sel [(eq "masknez") (ne "maskeqz")]) ++(define_code_attr fsel_invert [(eq "%2,%3") (ne "%3,%2")]) + (define_code_attr selinv [(eq "maskeqz") (ne "masknez")]) +- +-;; Pipeline descriptions. +-;; +-;; generic.md provides a fallback for processors without a specific +-;; pipeline description. It is derived from the old define_function_unit +-;; version and uses the "alu" and "imuldiv" units declared below. +-;; +-;; Some of the processor-specific files are also derived from old +-;; define_function_unit descriptions and simply override the parts of +-;; generic.md that don't apply. The other processor-specific files +-;; are self-contained. +-(define_automaton "alu,imuldiv") + +-(define_cpu_unit "alu" "alu") +-(define_cpu_unit "imuldiv" "imuldiv") +- +-;; Ghost instructions produce no real code and introduce no hazards. +-;; They exist purely to express an effect on dataflow. +-(define_insn_reservation "ghost" 0 +- (eq_attr "type" "ghost") +- "nothing") +- +-(include "generic.md") +- + ;; + ;; .................... + ;; +@@ -831,37 +616,22 @@ + [(set_attr "type" "fadd") + (set_attr "mode" "")]) + +-(define_expand "add3" +- [(set (match_operand:GPR 0 "register_operand") +- (plus:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "arith_operand")))] +- "") +- +-(define_insn "*add3" ++(define_insn "add3" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (plus:GPR (match_operand:GPR 1 "register_operand" "r,r") +- (match_operand:GPR 2 "arith_operand" "r,Q")))] ++ (match_operand:GPR 2 "arith_operand" "r,I")))] + "" +-{ +- if (which_alternative == 0) +- return "add.\t%0,%1,%2"; +- else +- return "addi.\t%0,%1,%2"; +-} ++ "add%i2.\t%0,%1,%2"; + [(set_attr "alu_type" "add") +- (set_attr "compression" "*,*") + (set_attr "mode" "")]) + +- + (define_insn "*addsi3_extended" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "r,r") +- (match_operand:SI 2 "arith_operand" "r,Q"))))] ++ (match_operand:SI 2 "arith_operand" "r,I"))))] + "TARGET_64BIT" +- "@ +- add.w\t%0,%1,%2 +- addi.w\t%0,%1,%2" ++ "add%i2.w\t%0,%1,%2" + [(set_attr "alu_type" "add") + (set_attr "mode" "SI")]) + +@@ -885,23 +655,23 @@ + + (define_insn "sub3" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (minus:GPR (match_operand:GPR 1 "register_operand" "r") ++ (minus:GPR (match_operand:GPR 1 "register_operand" "rJ") + (match_operand:GPR 2 "register_operand" "r")))] + "" +- "sub.\t%0,%1,%2" ++ "sub.\t%0,%z1,%2" + [(set_attr "alu_type" "sub") +- (set_attr "compression" "*") + (set_attr "mode" "")]) + ++ + (define_insn "*subsi3_extended" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI +- (minus:SI (match_operand:SI 1 "register_operand" "r") +- (match_operand:SI 2 "register_operand" "r"))))] ++ (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") ++ (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_64BIT" +- "sub.w\t%0,%1,%2" +- [(set_attr "alu_type" "sub") +- (set_attr "mode" "DI")]) ++ "sub.w\t%0,%z1,%2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI")]) + + ;; + ;; .................... +@@ -911,17 +681,10 @@ + ;; .................... + ;; + +-(define_expand "mul3" +- [(set (match_operand:SCALARF 0 "register_operand") +- (mult:SCALARF (match_operand:SCALARF 1 "register_operand") +- (match_operand:SCALARF 2 "register_operand")))] +- "" +- "") +- +-(define_insn "*mul3" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f") +- (match_operand:SCALARF 2 "register_operand" "f")))] ++(define_insn "mul3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (mult:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fmul.\t%0,%1,%2" + [(set_attr "type" "fmul") +@@ -933,20 +696,27 @@ + (match_operand:GPR 2 "register_operand" "r")))] + "" + "mul.\t%0,%1,%2" +- [(set_attr "type" "imul3") ++ [(set_attr "type" "imul") + (set_attr "mode" "")]) + +- +- + (define_insn "mulsidi3_64bit" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))] +- "" ++ "TARGET_64BIT" + "mul.d\t%0,%1,%2" +- [(set_attr "type" "imul3") ++ [(set_attr "type" "imul") + (set_attr "mode" "DI")]) + ++(define_insn "*mulsi3_extended" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (mult:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r"))))] ++ "TARGET_64BIT" ++ "mul.w\t%0,%1,%2" ++ [(set_attr "type" "imul") ++ (set_attr "mode" "SI")]) + + ;; + ;; ........................ +@@ -956,9 +726,8 @@ + ;; ........................ + ;; + +- + (define_expand "mulditi3" +- [(set (match_operand:TI 0 "register_operand") ++ [(set (match_operand:TI 0 "register_operand") + (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) + (any_extend:TI (match_operand:DI 2 "register_operand"))))] + "TARGET_64BIT" +@@ -975,7 +744,7 @@ + }) + + (define_insn "muldi3_highpart" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (any_extend:TI +@@ -989,7 +758,7 @@ + (set_attr "mode" "DI")]) + + (define_expand "mulsidi3" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" " r")) + (any_extend:DI +@@ -1005,7 +774,7 @@ + }) + + (define_insn "mulsi3_highpart" +- [(set (match_operand:SI 0 "register_operand" "=r") ++ [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI (any_extend:DI +@@ -1018,97 +787,35 @@ + [(set_attr "type" "imul") + (set_attr "mode" "SI")]) + +-;; Floating point multiply accumulate instructions. + +-(define_expand "fma4" +- [(set (match_operand:ANYF 0 "register_operand") +- (fma:ANYF (match_operand:ANYF 1 "register_operand") +- (match_operand:ANYF 2 "register_operand") +- (match_operand:ANYF 3 "register_operand")))] +- "TARGET_HARD_FLOAT") ++;; .................... ++;; ++;; FLOATING POINT COPYSIGN ++;; ++;; .................... ++ ++;; FLOATING POINT COPYSIGN ++;; ++;; .................... + +-(define_insn "*fma4_madd4" ++(define_insn "copysign3" + [(set (match_operand:ANYF 0 "register_operand" "=f") +- (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f") +- (match_operand:ANYF 3 "register_operand" "f")))] ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")] ++ UNSPEC_FCOPYSIGN))] + "TARGET_HARD_FLOAT" +- "fmadd.\t%0,%1,%2,%3" +- [(set_attr "type" "fmadd") ++ "fcopysign.\t%0,%1,%2" ++ [(set_attr "type" "fcopysign") + (set_attr "mode" "")]) + +-;; The fms, fnma, and fnms instructions can be used even when HONOR_NANS +-;; is true because while IEEE 754-2008 requires the negate operation to +-;; negate the sign of a NAN and the LARCH neg instruction does not do this, +-;; the fma part of the instruction has no requirement on how the sign of +-;; a NAN is handled and so the final sign bit of the entire operation is +-;; undefined. +- +-(define_expand "fms4" +- [(set (match_operand:ANYF 0 "register_operand") +- (fma:ANYF (match_operand:ANYF 1 "register_operand") +- (match_operand:ANYF 2 "register_operand") +- (neg:ANYF (match_operand:ANYF 3 "register_operand"))))] +- "TARGET_HARD_FLOAT") +- + +-(define_insn "*fms4_msub4" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f") +- (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] +- "TARGET_HARD_FLOAT" +- "fmsub.\t%0,%1,%2,%3" +- [(set_attr "type" "fmadd") +- (set_attr "mode" "")]) ++;; + +-;; fnma is defined in GCC as (fma (neg op1) op2 op3) +-;; (-op1 * op2) + op3 ==> -(op1 * op2) + op3 ==> -((op1 * op2) - op3) +-;; The loongarch nmsub instructions implement -((op1 * op2) - op3) +-;; This transformation means we may return the wrong signed zero +-;; so we check HONOR_SIGNED_ZEROS. + +-(define_expand "fnma4" +- [(set (match_operand:ANYF 0 "register_operand") +- (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand")) +- (match_operand:ANYF 2 "register_operand") +- (match_operand:ANYF 3 "register_operand")))] +- "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)") + +-(define_insn "*fnma4_nmsub4" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) +- (match_operand:ANYF 2 "register_operand" "f") +- (match_operand:ANYF 3 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)" +- "fnmsub.\t%0,%1,%2,%3" +- [(set_attr "type" "fmadd") +- (set_attr "mode" "")]) + +-;; fnms is defined as: (fma (neg op1) op2 (neg op3)) +-;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3) +-;; The loongarch nmadd instructions implement -((op1 * op2) + op3) +-;; This transformation means we may return the wrong signed zero +-;; so we check HONOR_SIGNED_ZEROS. + +-(define_expand "fnms4" +- [(set (match_operand:ANYF 0 "register_operand") +- (fma:ANYF +- (neg:ANYF (match_operand:ANYF 1 "register_operand")) +- (match_operand:ANYF 2 "register_operand") +- (neg:ANYF (match_operand:ANYF 3 "register_operand"))))] +- "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)") + +-(define_insn "*fnms4_nmadd4" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (fma:ANYF +- (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) +- (match_operand:ANYF 2 "register_operand" "f") +- (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] +- "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)" +- "fnmadd.\t%0,%1,%2,%3" +- [(set_attr "type" "fmadd") +- (set_attr "mode" "")]) + + ;; + ;; .................... +@@ -1118,144 +825,315 @@ + ;; .................... + ;; + ++;; Float division and modulus. + (define_expand "div3" + [(set (match_operand:ANYF 0 "register_operand") +- (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") ++ (div:ANYF (match_operand:ANYF 1 "register_operand") + (match_operand:ANYF 2 "register_operand")))] +- "" ++ "" + { +- if (const_1_operand (operands[1], mode)) +- if (!(ISA_HAS_FP_RECIP_RSQRT (mode) +- && flag_unsafe_math_optimizations)) +- operands[1] = force_reg (mode, operands[1]); ++ if (mode == SFmode ++ && TARGET_RECIP_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands[0], operands[1], ++ operands[2], SFmode); ++ DONE; ++ } + }) + +-;; These patterns work around the early SB-1 rev2 core "F1" erratum: +-;; +-;; If an mftg1 or dmftg1 happens to access the floating point register +-;; file at the same time a long latency operation (div, sqrt, recip, +-;; sqrt) iterates an intermediate result back through the floating +-;; point register file bypass, then instead returning the correct +-;; register value the mftg1 or dmftg1 operation returns the intermediate +-;; result of the long latency operation. +-;; +-;; The workaround is to insert an unconditional 'mov' from/to the +-;; long latency op destination register. +- + (define_insn "*div3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (div:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] +- "" +-{ +- return "fdiv.\t%0,%1,%2"; +-} ++ "" ++ "fdiv.\t%0,%1,%2" + [(set_attr "type" "fdiv") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + ++;; In 3A5000, the reciprocal operation is the same as the division operation. ++ + (define_insn "*recip3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") + (match_operand:ANYF 2 "register_operand" "f")))] +- "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" +-{ +- return "frecip.\t%0,%2"; +-} ++ "" ++ "frecip.\t%0,%2" + [(set_attr "type" "frdiv") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + ++;; In 3A6000, frecipe calculates the approximate value of the reciprocal operation ++ ++(define_insn "recipe2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_RECIPE))] ++ "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations && TARGET_RECIP_DIV" ++ "frecipe.\t%0,%1" ++ [(set_attr "type" "frsqrte") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ + ;; Integer division and modulus. ++(define_expand "3" ++ [(set (match_operand:GPR 0 "register_operand") ++ (any_div:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "register_operand")))] ++ "" ++{ ++ if (GET_MODE (operands[0]) == SImode) ++ { ++ rtx reg1 = gen_reg_rtx (DImode); ++ rtx reg2 = gen_reg_rtx (DImode); ++ ++ operands[1] = gen_rtx_SIGN_EXTEND (word_mode, operands[1]); ++ operands[2] = gen_rtx_SIGN_EXTEND (word_mode, operands[2]); ++ ++ emit_insn (gen_rtx_SET (reg1, operands[1])); ++ emit_insn (gen_rtx_SET (reg2, operands[2])); + +-(define_insn "div3" ++ emit_insn (gen_di3_fake (operands[0], reg1, reg2)); ++ DONE; ++ } ++}) ++ ++(define_insn "*3" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (any_div:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")))] + "" +- { +- return loongarch_output_division ("div.\t%0,%1,%2", operands); +- } +- [(set_attr "type" "idiv3") ++{ ++ return loongarch_output_division (".\t%0,%1,%2", operands); ++} ++ [(set_attr "type" "idiv") + (set_attr "mode" "")]) + +-(define_insn "mod3" +- [(set (match_operand:GPR 0 "register_operand" "=&r") +- (any_mod:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "register_operand" "r")))] ++(define_insn "di3_fake" ++ [(set (match_operand:SI 0 "register_operand" "=&r") ++ (any_div:SI (match_operand:DI 1 "register_operand" "r") ++ (match_operand:DI 2 "register_operand" "r")))] + "" +- { +- return loongarch_output_division ("mod.\t%0,%1,%2", operands); +- } +- [(set_attr "type" "idiv3") +- (set_attr "mode" "")]) +- +-;; +-;; .................... +-;; +-;; SQUARE ROOT +-;; +-;; .................... +- +-;; These patterns work around the early SB-1 rev2 core "F1" erratum (see +-;; "*div[sd]f3" comment for details). +- +-(define_insn "sqrt2" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] +- "" + { +- return "fsqrt.\t%0,%1"; ++ return loongarch_output_division (".w\t%0,%1,%2", operands); + } +- [(set_attr "type" "fsqrt") +- (set_attr "mode" "") +- (set_attr "insn_count" "1")]) ++ [(set_attr "type" "idiv") ++ (set_attr "mode" "SI")]) + +-(define_insn "*rsqrta" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))] +- "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" +-{ +- return "frsqrt.\t%0,%2"; +-} +- [(set_attr "type" "frsqrt") +- (set_attr "mode" "") +- (set_attr "insn_count" "1")]) ++;; Floating point multiply accumulate instructions. + +-(define_insn "*rsqrtb" ++;; a * b + c ++(define_insn "fma4" + [(set (match_operand:ANYF 0 "register_operand" "=f") +- (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (match_operand:ANYF 2 "register_operand" "f"))))] +- "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" +-{ +- return "frsqrt.\t%0,%2"; +-} +- [(set_attr "type" "frsqrt") +- (set_attr "mode" "") +- (set_attr "insn_count" "1")]) +- +-;; +-;; .................... +-;; +-;; ABSOLUTE VALUE +-;; +-;; .................... +- +-;; Do not use the integer abs macro instruction, since that signals an +-;; exception on -2147483648 (sigh). +- +-;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic +-;; instruction that treats all NaN inputs as invalid; it does not clear +-;; their sign bit. We therefore can't use that form if the signs of +-;; NaNs matter. ++ (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ (match_operand:ANYF 3 "register_operand" "f")))] ++ "" ++ "fmadd.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) + +-(define_insn "abs2" ++;; a * b - c ++(define_insn "fms4" + [(set (match_operand:ANYF 0 "register_operand" "=f") +- (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] + "" +- "fabs.\t%0,%1" +- [(set_attr "type" "fabs") ++ "fmsub.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; fnma is defined in GCC as (fma (neg op1) op2 op3) ++;; (-op1 * op2) + op3 ==> -(op1 * op2) + op3 ==> -((op1 * op2) - op3) ++;; The loongarch nmsub instructions implement -((op1 * op2) - op3) ++;; This transformation means we may return the wrong signed zero ++;; so we check HONOR_SIGNED_ZEROS. ++ ++;; -a * b + c ++(define_insn "fnma4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (match_operand:ANYF 2 "register_operand" "f") ++ (match_operand:ANYF 3 "register_operand" "f")))] ++ "!HONOR_SIGNED_ZEROS (mode)" ++ "fnmsub.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; fnms is defined as: (fma (neg op1) op2 (neg op3)) ++;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3) ++;; The loongarch nmadd instructions implement -((op1 * op2) + op3) ++;; This transformation means we may return the wrong signed zero ++;; so we check HONOR_SIGNED_ZEROS. ++ ++;; -a * b - c ++(define_insn "fnms4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF ++ (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (match_operand:ANYF 2 "register_operand" "f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] ++ "!HONOR_SIGNED_ZEROS (mode)" ++ "fnmadd.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; -(-a * b - c), modulo signed zeros ++(define_insn "*fma4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (neg:ANYF ++ (fma:ANYF ++ (neg:ANYF (match_operand:ANYF 1 "register_operand" " f")) ++ (match_operand:ANYF 2 "register_operand" " f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" " f")))))] ++ "!HONOR_SIGNED_ZEROS (mode)" ++ "fmadd.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; -(-a * b + c), modulo signed zeros ++(define_insn "*fms4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (neg:ANYF ++ (fma:ANYF ++ (neg:ANYF (match_operand:ANYF 1 "register_operand" " f")) ++ (match_operand:ANYF 2 "register_operand" " f") ++ (match_operand:ANYF 3 "register_operand" " f"))))] ++ "!HONOR_SIGNED_ZEROS (mode)" ++ "fmsub.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; -(a * b + c) ++(define_insn "*fnms4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (neg:ANYF ++ (fma:ANYF ++ (match_operand:ANYF 1 "register_operand" " f") ++ (match_operand:ANYF 2 "register_operand" " f") ++ (match_operand:ANYF 3 "register_operand" " f"))))] ++ "" ++ "fnmadd.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; -(a * b - c) ++(define_insn "*fnma4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (neg:ANYF ++ (fma:ANYF ++ (match_operand:ANYF 1 "register_operand" " f") ++ (match_operand:ANYF 2 "register_operand" " f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" " f")))))] ++ "" ++ "fnmsub.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; ++;; .................... ++;; ++;; SQUARE ROOT ++;; ++;; .................... ++ ++(define_insn "*sqrt2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++ "fsqrt.\t%0,%1" ++ [(set_attr "type" "fsqrt") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++(define_expand "sqrt2" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (sqrt:ANYF (match_operand:ANYF 1 "register_operand")))] ++ "" ++{ ++ if (mode == SFmode ++ && TARGET_RECIP_SQRT ++ && flag_unsafe_math_optimizations ++ && !optimize_insn_for_size_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_expand "rsqrt2" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand")] ++ UNSPEC_RSQRT))] ++ "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations" ++{ ++ if (mode == SFmode ++ && TARGET_RECIP_RSQRT ++ && flag_unsafe_math_optimizations ++ && !optimize_insn_for_size_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_RSQRT))] ++ "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations" ++ "frsqrt.\t%0,%1" ++ [(set_attr "type" "frsqrt") ++ (set_attr "mode" "")]) ++ ++(define_insn "rsqrte" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_RSQRTE))] ++ "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations && TARGET_RECIP_SQRT" ++ "frsqrte.\t%0,%1" ++ [(set_attr "type" "frsqrte") ++ (set_attr "mode" "")]) ++ ++(define_insn "*rsqrta" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") ++ (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))] ++ "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations" ++ "frsqrt.\t%0,%2" ++ [(set_attr "type" "frsqrt") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++(define_insn "*rsqrtb" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") ++ (match_operand:ANYF 2 "register_operand" "f"))))] ++ "flag_unsafe_math_optimizations" ++ "frsqrt.\t%0,%2" ++ [(set_attr "type" "frsqrt") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++;; ++;; .................... ++;; ++;; ABSOLUTE VALUE ++;; ++;; .................... ++ ++(define_insn "abs2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++ "fabs.\t%0,%1" ++ [(set_attr "type" "fabs") + (set_attr "mode" "")]) + + ;; +@@ -1290,7 +1168,54 @@ + [(set_attr "type" "clz") + (set_attr "mode" "")]) + ++;; ++;; .................... ++;; ++;; MIN/MAX ++;; ++;; .................... ++ ++(define_insn "smax3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (smax:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fmax.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) ++ ++(define_insn "smin3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (smin:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fmin.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) + ++(define_insn "smaxa3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (if_then_else:ANYF ++ (gt (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (abs:ANYF (match_operand:ANYF 2 "register_operand" "f"))) ++ (match_dup 1) ++ (match_dup 2)))] ++ "" ++ "fmaxa.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) ++ ++(define_insn "smina3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (if_then_else:ANYF ++ (lt (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (abs:ANYF (match_operand:ANYF 2 "register_operand" "f"))) ++ (match_dup 1) ++ (match_dup 2)))] ++ "" ++ "fmina.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) + + ;; + ;; .................... +@@ -1299,28 +1224,21 @@ + ;; + ;; .................... + +-(define_insn "negsi2" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (neg:SI (match_operand:SI 1 "register_operand" "r")))] ++(define_insn "neg2" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (neg:GPR (match_operand:GPR 1 "register_operand" "r")))] + "" +-{ +- return "sub.w\t%0,%.,%1"; +-} ++ "sub.\t%0,%.,%1" + [(set_attr "alu_type" "sub") +- (set_attr "mode" "SI")]) +- +-(define_insn "negdi2" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (neg:DI (match_operand:DI 1 "register_operand" "r")))] +- "TARGET_64BIT" +- "sub.d\t%0,%.,%1" +- [(set_attr "alu_type" "sub") +- (set_attr "mode" "DI")]) ++ (set_attr "mode" "")]) + +-;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic +-;; instruction that treats all NaN inputs as invalid; it does not flip +-;; their sign bit. We therefore can't use that form if the signs of +-;; NaNs matter. ++(define_insn "one_cmpl2" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (not:GPR (match_operand:GPR 1 "register_operand" "r")))] ++ "" ++ "nor\t%0,%.,%1" ++ [(set_attr "alu_type" "not") ++ (set_attr "mode" "")]) + + (define_insn "neg2" + [(set (match_operand:ANYF 0 "register_operand" "=f") +@@ -1329,17 +1247,6 @@ + "fneg.\t%0,%1" + [(set_attr "type" "fneg") + (set_attr "mode" "")]) +- +-(define_insn "one_cmpl2" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (not:GPR (match_operand:GPR 1 "register_operand" "r")))] +- "" +-{ +- return "nor\t%0,%.,%1"; +-} +- [(set_attr "alu_type" "not") +- (set_attr "compression" "*") +- (set_attr "mode" "")]) + + + ;; +@@ -1350,133 +1257,58 @@ + ;; .................... + ;; + +- +-(define_expand "and3" +- [(set (match_operand:GPR 0 "register_operand") +- (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "and_reg_operand")))]) +- +-;; The middle-end is not allowed to convert ANDing with 0xffff_ffff into a +-;; zero_extendsidi2 because of TARGET_TRULY_NOOP_TRUNCATION, so handle these +-;; here. Note that this variant does not trigger for SI mode because we +-;; require a 64-bit HOST_WIDE_INT and 0xffff_ffff wouldn't be a canonical +-;; sign-extended SImode value. +-;; +-;; These are possible combinations for operand 1 and 2. +-;; (r=register, mem=memory, x=match, S=split): +-;; +-;; \ op1 r/EXT r/!EXT mem +-;; op2 +-;; +-;; andi x x +-;; 0xff x x x +-;; 0xffff x x x +-;; 0xffff_ffff x S x +-;; low-bitmask x +-;; register x x +-;; register =op1 +- +-(define_insn "*and3" +- [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r") +- (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "o,o,W,r,r,r,r") +- (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,K,Yx,Yw,r")))] +- " and_operands_ok (mode, operands[1], operands[2])" +-{ +- int len; +- +- switch (which_alternative) +- { +- case 0: +- operands[1] = gen_lowpart (QImode, operands[1]); +- return "ld.bu\t%0,%1"; +- case 1: +- operands[1] = gen_lowpart (HImode, operands[1]); +- return "ld.hu\t%0,%1"; +- case 2: +- operands[1] = gen_lowpart (SImode, operands[1]); +- if (loongarch_14bit_shifted_offset_address_p (XEXP (operands[1], 0), SImode)) +- return "ldptr.w\t%0,%1\n\tbstrins.d\t%0,$r0,63,32"; +- else if (loongarch_12bit_offset_address_p (XEXP (operands[1], 0), SImode)) +- return "ld.wu\t%0,%1"; +- else +- gcc_unreachable (); +- case 3: +- return "andi\t%0,%1,%x2"; +- case 4: +- len = low_bitmask_len (mode, INTVAL (operands[2])); +- operands[2] = GEN_INT (len-1); +- return "bstrpick.\t%0,%1,%2,0"; +- case 5: +- return "#"; +- case 6: +- return "and\t%0,%1,%2"; +- default: +- gcc_unreachable (); +- } +-} +- [(set_attr "move_type" "load,load,load,andi,pick_ins,shift_shift,logical") +- (set_attr "compression" "*,*,*,*,*,*,*") ++(define_insn "3" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r") ++ (any_bitwise:GPR (match_operand:GPR 1 "register_operand" "r,r") ++ (match_operand:GPR 2 "uns_arith_operand" "r,K")))] ++ "" ++ "%i2\t%0,%1,%2" ++ [(set_attr "type" "logical") + (set_attr "mode" "")]) + +-(define_expand "ior3" +- [(set (match_operand:GPR 0 "register_operand") +- (ior:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "uns_arith_operand")))] ++(define_insn "and3_extended" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r") ++ (match_operand:GPR 2 "low_bitmask_operand" "Yx")))] + "" + { +-}) ++ int len; + +-(define_insn "*ior3" +- [(set (match_operand:GPR 0 "register_operand" "=r,r") +- (ior:GPR (match_operand:GPR 1 "register_operand" "r,r") +- (match_operand:GPR 2 "uns_arith_operand" "r,K")))] +- "" +- "@ +- or\t%0,%1,%2 +- ori\t%0,%1,%x2" +- [(set_attr "alu_type" "or") +- (set_attr "compression" "*,*") ++ len = low_bitmask_len (mode, INTVAL (operands[2])); ++ operands[2] = GEN_INT (len-1); ++ return "bstrpick.\t%0,%1,%2,0"; ++} ++ [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + + (define_insn "*iorhi3" + [(set (match_operand:HI 0 "register_operand" "=r,r") +- (ior:HI (match_operand:HI 1 "register_operand" "r,r") +- (match_operand:HI 2 "uns_arith_operand" "K,r")))] ++ (ior:HI (match_operand:HI 1 "register_operand" "%r,r") ++ (match_operand:HI 2 "uns_arith_operand" "r,K")))] + "" +- "@ +- ori\t%0,%1,%x2 +- or\t%0,%1,%2" +- [(set_attr "alu_type" "or") ++ "or%i2\t%0,%1,%2" ++ [(set_attr "type" "logical") + (set_attr "mode" "HI")]) + +-(define_expand "xor3" +- [(set (match_operand:GPR 0 "register_operand") +- (xor:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "uns_arith_operand")))] +- "" +- "") +- +-(define_insn "*xor3" +- [(set (match_operand:GPR 0 "register_operand" "=r,r") +- (xor:GPR (match_operand:GPR 1 "register_operand" "r,r") +- (match_operand:GPR 2 "uns_arith_operand" "r,K")))] +- "" +- "@ +- xor\t%0,%1,%2 +- xori\t%0,%1,%x2" +- [(set_attr "alu_type" "xor") +- (set_attr "compression" "*,*") +- (set_attr "mode" "")]) +- +- + (define_insn "*nor3" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "r")) ++ (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r")) + (not:GPR (match_operand:GPR 2 "register_operand" "r"))))] + "" + "nor\t%0,%1,%2" +- [(set_attr "alu_type" "nor") ++ [(set_attr "type" "logical") + (set_attr "mode" "")]) ++ ++(define_insn "n" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (neg_bitwise:GPR ++ (not:GPR (match_operand:GPR 1 "register_operand" "r")) ++ (match_operand:GPR 2 "register_operand" "r")))] ++ "" ++ "n\t%0,%2,%1" ++ [(set_attr "type" "logical") ++ (set_attr "mode" "")]) ++ + + ;; + ;; .................... +@@ -1485,163 +1317,109 @@ + ;; + ;; .................... + +- +- +-(define_insn "truncdfsf2" +- [(set (match_operand:SF 0 "register_operand" "=f") +- (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +- "fcvt.s.d\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "cnv_mode" "D2S") +- (set_attr "mode" "SF")]) +- +-;; Integer truncation patterns. Truncating SImode values to smaller +-;; modes is a no-op, as it is for most other GCC ports. Truncating +-;; DImode values to SImode is not a no-op for TARGET_64BIT since we +-;; need to make sure that the lower 32 bits are properly sign-extended +-;; (see TARGET_TRULY_NOOP_TRUNCATION). Truncating DImode values into modes +-;; smaller than SImode is equivalent to two separate truncations: +-;; +-;; A B +-;; DI ---> HI == DI ---> SI ---> HI +-;; DI ---> QI == DI ---> SI ---> QI +-;; +-;; Step A needs a real instruction but step B does not. +- +-(define_insn "truncdisi2" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,ZC,m") +- (truncate:SI (match_operand:DI 1 "register_operand" "r,r,r")))] +- "TARGET_64BIT" +- "@ +- slli.w\t%0,%1,0 +- stptr.w\t%1,%0 +- st.w\t%1,%0" +- [(set_attr "move_type" "sll0,store,store") +- (set_attr "mode" "SI")]) +- + (define_insn "truncdi2" +- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,m") +- (truncate:SHORT (match_operand:DI 1 "register_operand" "r,r")))] ++ [(set (match_operand:SUBDI 0 "nonimmediate_operand" "=r,m,k") ++ (truncate:SUBDI (match_operand:DI 1 "register_operand" "r,r,r")))] + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 +- st.\t%1,%0" +- [(set_attr "move_type" "sll0,store") ++ st.\t%1,%0 ++ stx.\t%1,%0" ++ [(set_attr "move_type" "sll0,store,store") + (set_attr "mode" "SI")]) + +-;; Combiner patterns to optimize shift/truncate combinations. +- +-(define_insn "*ashr_trunc" +- [(set (match_operand:SUBDI 0 "register_operand" "=r") +- (truncate:SUBDI +- (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") +- (match_operand:DI 2 "const_arith_operand" ""))))] +- "TARGET_64BIT && IN_RANGE (INTVAL (operands[2]), 32, 63)" +- "srai.d\t%0,%1,%2" +- [(set_attr "type" "shift") +- (set_attr "mode" "")]) ++(define_insn "truncdfsf2" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_DOUBLE_FLOAT" ++ "fcvt.s.d\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "cnv_mode" "D2S") ++ (set_attr "mode" "SF")]) + +-(define_insn "*lshr32_trunc" +- [(set (match_operand:SUBDI 0 "register_operand" "=r") +- (truncate:SUBDI +- (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") +- (const_int 32))))] +- "TARGET_64BIT" +- "srai.d\t%0,%1,32" +- [(set_attr "type" "shift") +- (set_attr "mode" "")]) ++;;(define_insn "truncdisi2_extended" ++;; [(set (match_operand:SI 0 "nonimmediate_operand" "=ZC") ++;; (truncate:SI (match_operand:DI 1 "register_operand" "r")))] ++;; "TARGET_64BIT" ++;; "stptr.w\t%1,%0" ++;; [(set_attr "move_type" "store") ++;; (set_attr "mode" "SI")]) + + +- + ;; + ;; .................... + ;; + ;; ZERO EXTENSION + ;; + ;; .................... +- +-;; Extension insns. +- + (define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand") +- (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] ++ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_64BIT") + +-(define_insn "*zero_extendsidi2_dext" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") +- (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,ZC,W")))] ++(define_insn_and_split "*zero_extendsidi2_internal" ++ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") ++ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,ZC,k")))] + "TARGET_64BIT" + "@ + bstrpick.d\t%0,%1,31,0 +- ldptr.w\t%0,%1\n\tlu32i.d\t%0,0 +- ld.wu\t%0,%1" +- [(set_attr "move_type" "arith,load,load") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "1,2,1")]) +- +-;; See the comment before the *and3 pattern why this is generated by +-;; combine. +- +-(define_expand "zero_extend2" +- [(set (match_operand:GPR 0 "register_operand") +- (zero_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))] +- "" +-{ +-}) +- +-(define_insn "*zero_extend2" +- [(set (match_operand:GPR 0 "register_operand" "=r,r") +- (zero_extend:GPR +- (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))] +- "" +-{ +- switch (which_alternative) +- { +- case 0: +- return "bstrpick.\t%0,%1,,0"; +- case 1: +- return "ld.u\t%0,%1"; +- default: +- gcc_unreachable (); ++ ld.wu\t%0,%1 ++ # ++ ldx.wu\t%0,%1" ++ "&& reload_completed ++ && MEM_P (operands[1]) ++ && (loongarch_14bit_shifted_offset_address_p (XEXP (operands[1], 0), SImode) ++ && !loongarch_12bit_offset_address_p (XEXP (operands[1], 0), SImode)) ++ && !paradoxical_subreg_p (operands[0])" ++ [(set (match_dup 3) (match_dup 1)) ++ (set (match_dup 0) ++ (ior:DI (zero_extend:DI (subreg:SI (match_dup 0) 0)) ++ (match_dup 2)))] ++ { ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ operands[3] = gen_lowpart (SImode, operands[0]); ++ operands[2] = const0_rtx; + } +-} +- [(set_attr "move_type" "pick_ins,load") +- (set_attr "compression" "*,*") +- (set_attr "mode" "")]) +- ++ [(set_attr "move_type" "arith,load,load,load") ++ (set_attr "mode" "DI")]) + +-(define_expand "zero_extendqihi2" +- [(set (match_operand:HI 0 "register_operand") +- (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] ++(define_insn "zero_extend2" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r,r") ++ (zero_extend:GPR ++ (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))] + "" +-{ +-}) ++ "@ ++ bstrpick.w\t%0,%1,,0 ++ ld.u\t%0,%1 ++ ldx.u\t%0,%1" ++ [(set_attr "move_type" "pick_ins,load,load") ++ (set_attr "mode" "")]) + +-(define_insn "*zero_extendqihi2" +- [(set (match_operand:HI 0 "register_operand" "=r,r") +- (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] ++(define_insn "zero_extendqihi2" ++ [(set (match_operand:HI 0 "register_operand" "=r,r,r") ++ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))] + "" + "@ +- andi\t%0,%1,0x00ff ++ andi\t%0,%1,0xff ++ ldx.bu\t%0,%1 + ld.bu\t%0,%1" +- [(set_attr "move_type" "andi,load") ++ [(set_attr "move_type" "andi,load,load") + (set_attr "mode" "HI")]) + + ;; Combiner patterns to optimize truncate/zero_extend combinations. + + (define_insn "*zero_extend_trunc" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (zero_extend:GPR ++ (zero_extend:GPR + (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_64BIT" +- "bstrpick.\t%0,%1,,0" ++ "bstrpick.w\t%0,%1,,0" + [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + + (define_insn "*zero_extendhi_truncqi" + [(set (match_operand:HI 0 "register_operand" "=r") +- (zero_extend:HI ++ (zero_extend:HI + (truncate:QI (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_64BIT" + "andi\t%0,%1,0xff" +@@ -1655,142 +1433,77 @@ + ;; + ;; .................... + +-;; Extension insns. +-;; Those for integer source operand are ordered widest source type first. +- +-;; When TARGET_64BIT, all SImode integer and accumulator registers +-;; should already be in sign-extended form (see TARGET_TRULY_NOOP_TRUNCATION +-;; and truncdisi2). We can therefore get rid of register->register +-;; instructions if we constrain the source to be in the same register as +-;; the destination. +-;; +-;; Only the pre-reload scheduler sees the type of the register alternatives; +-;; we split them into nothing before the post-reload scheduler runs. +-;; These alternatives therefore have type "move" in order to reflect +-;; what happens if the two pre-reload operands cannot be tied, and are +-;; instead allocated two separate GPRs. We don't distinguish between +-;; the GPR and LO cases because we don't usually know during pre-reload +-;; scheduling whether an operand will be LO or not. + (define_insn_and_split "extendsidi2" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") +- (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m")))] ++ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") ++ (sign_extend:DI ++ (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m,k")))] + "TARGET_64BIT" +- "@ +- # +- ldptr.w\t%0,%1 +- ld.w\t%0,%1" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "#"; ++ case 1: ++ { ++ rtx offset = XEXP (operands[1], 0); ++ if (GET_CODE (offset) == PLUS) ++ offset = XEXP (offset, 1); ++ else ++ offset = const0_rtx; ++ if (const_arith_operand (offset, Pmode) || (offset == const0_rtx)) ++ return "ld.w\t%0,%1"; ++ else ++ return "ldptr.w\t%0,%1"; ++ } ++ case 2: ++ return "ld.w\t%0,%1"; ++ case 3: ++ return "ldx.w\t%0,%1"; ++ default: ++ gcc_unreachable (); ++ } ++} + "&& reload_completed && register_operand (operands[1], VOIDmode)" + [(const_int 0)] + { + emit_note (NOTE_INSN_DELETED); + DONE; + } +- [(set_attr "move_type" "move,load,load") ++ [(set_attr "move_type" "move,load,load,load") + (set_attr "mode" "DI")]) + +-(define_expand "extend2" +- [(set (match_operand:GPR 0 "register_operand") +- (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))] +- "") +- +- +-(define_insn "*extend2_se" +- [(set (match_operand:GPR 0 "register_operand" "=r,r") +- (sign_extend:GPR +- (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))] ++(define_insn "extend2" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r,r") ++ (sign_extend:GPR ++ (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))] + "" + "@ + ext.w.\t%0,%1 +- ld.\t%0,%1" +- [(set_attr "move_type" "signext,load") ++ ld.\t%0,%1 ++ ldx.\t%0,%1" ++ [(set_attr "move_type" "signext,load,load") + (set_attr "mode" "")]) + +-(define_expand "extendqihi2" +- [(set (match_operand:HI 0 "register_operand") +- (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] +- "") +- +-(define_insn "*extendqihi2_seb" +- [(set (match_operand:HI 0 "register_operand" "=r,r") +- (sign_extend:HI +- (match_operand:QI 1 "nonimmediate_operand" "r,m")))] ++(define_insn "extendqihi2" ++ [(set (match_operand:HI 0 "register_operand" "=r,r,r") ++ (sign_extend:HI ++ (match_operand:QI 1 "nonimmediate_operand" "r,m,k")))] + "" + "@ + ext.w.b\t%0,%1 +- ld.b\t%0,%1" +- [(set_attr "move_type" "signext,load") +- (set_attr "mode" "SI")]) +- +-;; Combiner patterns for truncate/sign_extend combinations. The SI versions +-;; use the shift/truncate patterns. +- +-(define_insn_and_split "*extenddi_truncate" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (sign_extend:DI +- (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))] +- "TARGET_64BIT" +- "#" +- "&& reload_completed" +- [(set (match_dup 2) +- (ashift:DI (match_dup 1) +- (match_dup 3))) +- (set (match_dup 0) +- (ashiftrt:DI (match_dup 2) +- (match_dup 3)))] +-{ +- operands[2] = gen_lowpart (DImode, operands[0]); +- operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (mode)); +-} +- [(set_attr "move_type" "shift_shift") +- (set_attr "mode" "DI")]) +- +-(define_insn_and_split "*extendsi_truncate" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (sign_extend:SI +- (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))] +- "TARGET_64BIT" +- "#" +- "&& reload_completed" +- [(set (match_dup 2) +- (ashift:DI (match_dup 1) +- (match_dup 3))) +- (set (match_dup 0) +- (truncate:SI (ashiftrt:DI (match_dup 2) +- (match_dup 3))))] +-{ +- operands[2] = gen_lowpart (DImode, operands[0]); +- operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (mode)); +-} +- [(set_attr "move_type" "shift_shift") +- (set_attr "mode" "SI")]) +- +-(define_insn_and_split "*extendhi_truncateqi" +- [(set (match_operand:HI 0 "register_operand" "=r") +- (sign_extend:HI +- (truncate:QI (match_operand:DI 1 "register_operand" "r"))))] +- "TARGET_64BIT" +- "#" +- "&& reload_completed" +- [(set (match_dup 2) +- (ashift:DI (match_dup 1) +- (const_int 56))) +- (set (match_dup 0) +- (truncate:HI (ashiftrt:DI (match_dup 2) +- (const_int 56))))] +-{ +- operands[2] = gen_lowpart (DImode, operands[0]); +-} +- [(set_attr "move_type" "shift_shift") ++ ld.b\t%0,%1 ++ ldx.b\t%0,%1" ++ [(set_attr "move_type" "signext,load,load") + (set_attr "mode" "SI")]) + + (define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + "fcvt.d.s\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "cnv_mode" "S2D") +- (set_attr "mode" "DF")]) ++ [(set_attr "type" "fcvt") ++ (set_attr "cnv_mode" "S2D") ++ (set_attr "mode" "DF")]) + + ;; + ;; .................... +@@ -1799,104 +1512,60 @@ + ;; + ;; .................... + +-(define_expand "fix_truncdfsi2" +- [(set (match_operand:SI 0 "register_operand") +- (fix:SI (match_operand:DF 1 "register_operand")))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +-"" +-) +- +-(define_insn "fix_truncdfsi2_insn" +- [(set (match_operand:SI 0 "register_operand" "=f") +- (fix:SI (match_operand:DF 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +- "ftintrz.w.d %0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "DF") +- (set_attr "cnv_mode" "D2I")]) +- +- +-(define_expand "fix_truncsfsi2" +- [(set (match_operand:SI 0 "register_operand") +- (fix:SI (match_operand:SF 1 "register_operand")))] +- "TARGET_HARD_FLOAT" +-"" +-) +- +-(define_insn "fix_truncsfsi2_insn" +- [(set (match_operand:SI 0 "register_operand" "=f") +- (fix:SI (match_operand:SF 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT" +- "ftintrz.w.s %0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "SF") +- (set_attr "cnv_mode" "S2I")]) +- +- +-(define_insn "fix_truncdfdi2" +- [(set (match_operand:DI 0 "register_operand" "=f") +- (fix:DI (match_operand:DF 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" +- "ftintrz.l.d %0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "DF") +- (set_attr "cnv_mode" "D2I")]) ++;; conversion of a floating-point value to a integer + ++(define_insn "fix_trunc2" ++ [(set (match_operand:GPR 0 "register_operand" "=f") ++ (fix:GPR (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++ "ftintrz..\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "")]) + +-(define_insn "fix_truncsfdi2" +- [(set (match_operand:DI 0 "register_operand" "=f") +- (fix:DI (match_operand:SF 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" +- "ftintrz.l.s %0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "SF") +- (set_attr "cnv_mode" "S2I")]) +- ++;; conversion of an integral (or boolean) value to a floating-point value + + (define_insn "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:SI 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + "ffint.d.w\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "DF") ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "DF") + (set_attr "cnv_mode" "I2D")]) + +- + (define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + "ffint.d.l\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "DF") +- (set_attr "cnv_mode" "I2D")]) +- ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "DF") ++ (set_attr "cnv_mode" "I2D")]) + + (define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "ffint.s.w\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "SF") ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "SF") + (set_attr "cnv_mode" "I2S")]) + +- + (define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "register_operand" "f")))] +- "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + "ffint.s.l\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "SF") ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "SF") + (set_attr "cnv_mode" "I2S")]) + ++;; Convert a floating-point value to an unsigned integer. + + (define_expand "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "register_operand") + (unsigned_fix:SI (match_operand:DF 1 "register_operand")))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + { + rtx reg1 = gen_reg_rtx (DFmode); + rtx reg2 = gen_reg_rtx (DFmode); +@@ -1908,41 +1577,38 @@ + + real_2expN (&offset, 31, DFmode); + +- if (reg1) /* Turn off complaints about unreached code. */ +- { +- loongarch_emit_move (reg1, const_double_from_real_value (offset, DFmode)); +- do_pending_stack_adjust (); ++ loongarch_emit_move (reg1, ++ const_double_from_real_value (offset, DFmode)); ++ do_pending_stack_adjust (); + +- test = gen_rtx_GE (VOIDmode, operands[1], reg1); +- emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); ++ test = gen_rtx_GE (VOIDmode, operands[1], reg1); ++ emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); + +- emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1])); +- emit_jump_insn (gen_rtx_SET (pc_rtx, +- gen_rtx_LABEL_REF (VOIDmode, label2))); +- emit_barrier (); ++ emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1])); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, ++ gen_rtx_LABEL_REF (VOIDmode, label2))); ++ emit_barrier (); + +- emit_label (label1); +- loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); +- loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode +- (BITMASK_HIGH, SImode))); ++ emit_label (label1); ++ loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); ++ loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode ++ (BITMASK_HIGH, SImode))); + +- emit_insn (gen_fix_truncdfsi2 (operands[0], reg2)); +- emit_insn (gen_iorsi3 (operands[0], operands[0], reg3)); ++ emit_insn (gen_fix_truncdfsi2 (operands[0], reg2)); ++ emit_insn (gen_iorsi3 (operands[0], operands[0], reg3)); + +- emit_label (label2); ++ emit_label (label2); + +- /* Allow REG_NOTES to be set on last insn (labels don't have enough +- fields, and can't be used for REG_NOTES anyway). */ +- emit_use (stack_pointer_rtx); +- DONE; +- } ++ /* Allow REG_NOTES to be set on last insn (labels don't have enough ++ fields, and can't be used for REG_NOTES anyway). */ ++ emit_use (stack_pointer_rtx); ++ DONE; + }) + +- + (define_expand "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "register_operand") + (unsigned_fix:DI (match_operand:DF 1 "register_operand")))] +- "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + { + rtx reg1 = gen_reg_rtx (DFmode); + rtx reg2 = gen_reg_rtx (DFmode); +@@ -1980,7 +1646,6 @@ + DONE; + }) + +- + (define_expand "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "register_operand") + (unsigned_fix:SI (match_operand:SF 1 "register_operand")))] +@@ -2022,11 +1687,10 @@ + DONE; + }) + +- + (define_expand "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "register_operand") + (unsigned_fix:DI (match_operand:SF 1 "register_operand")))] +- "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT" ++ "TARGET_DOUBLE_FLOAT" + { + rtx reg1 = gen_reg_rtx (SFmode); + rtx reg2 = gen_reg_rtx (SFmode); +@@ -2067,35 +1731,35 @@ + ;; + ;; .................... + ;; +-;; DATA MOVEMENT ++;; EXTRACT AND INSERT + ;; + ;; .................... + + (define_expand "extzv" +- [(set (match_operand:GPR 0 "register_operand") +- (zero_extract:GPR (match_operand:GPR 1 "register_operand") +- (match_operand 2 "const_int_operand") +- (match_operand 3 "const_int_operand")))] ++ [(set (match_operand:X 0 "register_operand") ++ (zero_extract:X (match_operand:X 1 "register_operand") ++ (match_operand 2 "const_int_operand") ++ (match_operand 3 "const_int_operand")))] + "" + { + if (!loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]), +- INTVAL (operands[3]))) ++ INTVAL (operands[3]))) + FAIL; + }) + + (define_insn "*extzv" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (zero_extract:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand 2 "const_int_operand" "") +- (match_operand 3 "const_int_operand" "")))] ++ [(set (match_operand:X 0 "register_operand" "=r") ++ (zero_extract:X (match_operand:X 1 "register_operand" "r") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] + "loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]), +- INTVAL (operands[3]))" ++ INTVAL (operands[3]))" + { +- operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) -1 ); ++ operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) - 1); + return "bstrpick.\t%0,%1,%2,%3"; + } +- [(set_attr "type" "arith") +- (set_attr "mode" "")]) ++ [(set_attr "type" "arith") ++ (set_attr "mode" "")]) + + (define_expand "insv" + [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand") +@@ -2105,7 +1769,7 @@ + "" + { + if (!loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]), +- INTVAL (operands[2]))) ++ INTVAL (operands[2]))) + FAIL; + }) + +@@ -2115,26 +1779,20 @@ + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:GPR 3 "reg_or_0_operand" "rJ"))] + "loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]), +- INTVAL (operands[2]))" ++ INTVAL (operands[2]))" + { +- operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) -1 ); ++ operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) - 1); + return "bstrins.\t%0,%z3,%1,%2"; + } +- [(set_attr "type" "arith") +- (set_attr "mode" "")]) +- +-;; Allow combine to split complex const_int load sequences, using operand 2 +-;; to store the intermediate results. See move_operand for details. +-(define_split +- [(set (match_operand:GPR 0 "register_operand") +- (match_operand:GPR 1 "splittable_const_int_operand")) +- (clobber (match_operand:GPR 2 "register_operand"))] +- "" +- [(const_int 0)] +-{ +- loongarch_move_integer (operands[2], operands[0], INTVAL (operands[1])); +- DONE; +-}) ++ [(set_attr "type" "arith") ++ (set_attr "mode" "")]) ++ ++;; ++;; .................... ++;; ++;; DATA MOVEMENT ++;; ++;; .................... + + ;; 64-bit integer moves + +@@ -2151,152 +1809,46 @@ + DONE; + }) + +- + (define_insn "*movdi_32bit" +- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,ZC,r,m,*f,*f,*r,*m") +- (match_operand:DI 1 "move_operand" "r,i,ZC,r,m,r,*J*r,*m,*f,*f"))] ++ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m") ++ (match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))] + "!TARGET_64BIT + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore") +- (set (attr "mode") +- (if_then_else (eq_attr "move_type" "imul") +- (const_string "SI") +- (const_string "DI")))]) +- ++ [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") ++ (set_attr "mode" "DI")]) + + (define_insn "*movdi_64bit" +- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,ZC,r,m,*f,*f,*r,*m") +- (match_operand:DI 1 "move_operand" "r,Yd,ZC,rJ,m,rJ,*r*J,*m,*f,*f"))] ++ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m") ++ (match_operand:DI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f"))] + "TARGET_64BIT + && (register_operand (operands[0], DImode) +- || reg_or_0_operand (operands[1], DImode)) +- && !((GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF) +- && symbolic_operand (operands[1], VOIDmode) +- && (loongarch_cmodel_var == LARCH_CMODEL_EXTREME))" ++ || reg_or_0_operand (operands[1], DImode))" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore") ++ [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "DI")]) + +-(define_insn "movdi_extreme" +- [(parallel [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec_volatile:DI [(match_operand:DI 1 "symbolic_operand" "")] +- UNSPECV_MOVE_EXTREME)) +- (use (match_operand:DI 2 "register_operand" "=&r"))])] +- "TARGET_64BIT && (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)" +- { +- if (!loongarch_global_symbol_p (operands[1]) +- || loongarch_symbol_binds_local_p (operands[1])) +- return "la.local\t%0,%2,%1"; +- else +- return "la.global\t%0,%2,%1"; +- } +- [(set_attr "move_type" "const") +- (set_attr "mode" "DI")]) + ;; 32-bit Integer moves + +-;; Unlike most other insns, the move insns can't be split with +-;; different predicates, because register spilling and other parts of +-;; the compiler, have memoized the insn number already. +- +-(define_expand "mov" +- [(set (match_operand:IMOVE32 0 "") +- (match_operand:IMOVE32 1 ""))] +- "" +-{ +- if (loongarch_legitimize_move (mode, operands[0], operands[1])) +- DONE; +-}) +- +-;; The difference between these two is whether or not ints are allowed +-;; in FP registers (off by default, use -mdebugh to enable). +- +-(define_insn "*mov_internal" +- [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=r,r,r,ZC,r,m,*f,*f,*r,*m,*r,*z") +- (match_operand:IMOVE32 1 "move_operand" "r,Yd,ZC,rJ,m,rJ,*r*J,*m,*f,*f,*z,*r"))] +- "(register_operand (operands[0], mode) +- || reg_or_0_operand (operands[1], mode))" +- { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") +- (set_attr "compression" "all,*,*,*,*,*,*,*,*,*,*,*") +- (set_attr "mode" "SI")]) +- +- +- +-;; LARCH supports loading and storing a floating point register from +-;; the sum of two general registers. We use two versions for each of +-;; these four instructions: one where the two general registers are +-;; SImode, and one where they are DImode. This is because general +-;; registers will be in SImode when they hold 32-bit values, but, +-;; since the 32-bit values are always sign extended, the [ls][wd]xc1 +-;; instructions will still work correctly. +- +-;; ??? Perhaps it would be better to support these instructions by +-;; modifying TARGET_LEGITIMATE_ADDRESS_P and friends. However, since +-;; these instructions can only be used to load and store floating +-;; point registers, that would probably cause trouble in reload. +- +-(define_insn "*_" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "r") +- (match_operand:P 2 "register_operand" "r"))))] +- "" +- "\t%0,%1,%2" +- [(set_attr "type" "fpidxload") +- (set_attr "mode" "")]) +- +-(define_insn "*_" +- [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "r") +- (match_operand:P 2 "register_operand" "r"))) +- (match_operand:ANYF 0 "register_operand" "f"))] +- "TARGET_HARD_FLOAT" +- "\t%0,%1,%2" +- [(set_attr "type" "fpidxstore") +- (set_attr "mode" "")]) +- +-;; Loongson index address load and store. +-(define_insn "*_" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (mem:GPR +- (plus:P (match_operand:P 1 "register_operand" "r") +- (match_operand:P 2 "register_operand" "r"))))] +- "" +- "\t%0,%1,%2" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) +- +-(define_insn "*_" +- [(set (mem:GPR (plus:P (match_operand:P 1 "register_operand" "r") +- (match_operand:P 2 "register_operand" "r"))) +- (match_operand:GPR 0 "register_operand" "r"))] +- "" +- "\t%0,%1,%2" +- [(set_attr "type" "store") +- (set_attr "mode" "")]) +- +-;; SHORT mode sign_extend. +-(define_insn "*extend__" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (sign_extend:GPR +- (mem:SHORT +- (plus:P (match_operand:P 1 "register_operand" "r") +- (match_operand:P 2 "register_operand" "r")))))] ++(define_expand "movsi" ++ [(set (match_operand:SI 0 "") ++ (match_operand:SI 1 ""))] + "" +- "\t%0,%1,%2" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++{ ++ if (loongarch_legitimize_move (SImode, operands[0], operands[1])) ++ DONE; ++}) + +-(define_insn "*extend_" +- [(set (mem:SHORT (plus:P (match_operand:P 1 "register_operand" "r") +- (match_operand:P 2 "register_operand" "r"))) +- (match_operand:SHORT 0 "register_operand" "r"))] +- "" +- "\t%0,%1,%2" +- [(set_attr "type" "store") ++(define_insn "*movsi_internal" ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z") ++ (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))] ++ "(register_operand (operands[0], SImode) ++ || reg_or_0_operand (operands[1], SImode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") + (set_attr "mode" "SI")]) + +- + ;; 16-bit Integer moves + + ;; Unlike most other insns, the move insns can't be split with +@@ -2314,13 +1866,12 @@ + }) + + (define_insn "*movhi_internal" +- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m") +- (match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ"))] ++ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,k") ++ (match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))] + "(register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode))" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "move,const,const,load,store") +- (set_attr "compression" "all,all,*,*,*") ++ [(set_attr "move_type" "move,const,const,load,store,load,store") + (set_attr "mode" "HI")]) + + ;; 8-bit Integer moves +@@ -2340,13 +1891,12 @@ + }) + + (define_insn "*movqi_internal" +- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") +- (match_operand:QI 1 "move_operand" "r,I,m,rJ"))] ++ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m,r,k") ++ (match_operand:QI 1 "move_operand" "r,I,m,rJ,k,rJ"))] + "(register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode))" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "move,const,load,store") +- (set_attr "compression" "all,*,*,*") ++ [(set_attr "move_type" "move,const,load,store,load,store") + (set_attr "mode" "QI")]) + + ;; 32-bit floating point moves +@@ -2361,13 +1911,13 @@ + }) + + (define_insn "*movsf_hardfloat" +- [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*r,*r,*r,*m") +- (match_operand:SF 1 "move_operand" "f,G,m,f,G,*r,*f,*G*r,*m,*r"))] ++ [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,f,k,m,*f,*r,*r,*r,*m") ++ (match_operand:SF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*G*r,*m,*r"))] + "TARGET_HARD_FLOAT + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store") ++ [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store") + (set_attr "mode" "SF")]) + + (define_insn "*movsf_softfloat" +@@ -2380,7 +1930,6 @@ + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "SF")]) + +- + ;; 64-bit floating point moves + + (define_expand "movdf" +@@ -2393,13 +1942,13 @@ + }) + + (define_insn "*movdf_hardfloat" +- [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*r,*r,*r,*m") +- (match_operand:DF 1 "move_operand" "f,G,m,f,G,*r,*f,*r*G,*m,*r"))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT ++ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,f,k,m,*f,*r,*r,*r,*m") ++ (match_operand:DF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*r*G,*m,*r"))] ++ "TARGET_DOUBLE_FLOAT + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store") ++ [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store") + (set_attr "mode" "DF")]) + + (define_insn "*movdf_softfloat" +@@ -2433,11 +1982,10 @@ + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,load,store") + (set (attr "mode") +- (if_then_else (eq_attr "move_type" "imul") ++ (if_then_else (eq_attr "move_type" "imul") + (const_string "SI") + (const_string "TI")))]) + +- + ;; 128-bit floating point moves + + (define_expand "movtf" +@@ -2460,11 +2008,10 @@ + [(set_attr "move_type" "move,load,store,mgtf,mftg,fpload,fpstore") + (set_attr "mode" "TF")]) + +- + (define_split + [(set (match_operand:MOVE64 0 "nonimmediate_operand") + (match_operand:MOVE64 1 "move_operand"))] +- "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)" ++ "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1])" + [(const_int 0)] + { + loongarch_split_move_insn (operands[0], operands[1], curr_insn); +@@ -2474,7 +2021,7 @@ + (define_split + [(set (match_operand:MOVE128 0 "nonimmediate_operand") + (match_operand:MOVE128 1 "move_operand"))] +- "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)" ++ "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1])" + [(const_int 0)] + { + loongarch_split_move_insn (operands[0], operands[1], curr_insn); +@@ -2484,7 +2031,7 @@ + ;; Emit a doubleword move in which exactly one of the operands is + ;; a floating-point register. We can't just emit two normal moves + ;; because of the constraints imposed by the FPU register model; +-;; see loongarch_cannot_change_mode_class for details. Instead, we keep ++;; see loongarch_can_change_mode_class for details. Instead, we keep + ;; the FPR whole and use special patterns to refer to each word of + ;; the other operand. + +@@ -2516,6 +2063,108 @@ + DONE; + }) + ++;; Clear one FCC register ++ ++(define_insn "movfcc" ++ [(set (match_operand:FCC 0 "register_operand" "=z") ++ (const_int 0))] ++ "" ++ "movgr2cf\t%0,$r0") ++ ++;; Conditional move instructions. ++ ++(define_insn "*sel_using_" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r") ++ (if_then_else:GPR ++ (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r") ++ (const_int 0)) ++ (match_operand:GPR 2 "reg_or_0_operand" "r,J") ++ (match_operand:GPR 3 "reg_or_0_operand" "J,r")))] ++ "register_operand (operands[2], mode) ++ != register_operand (operands[3], mode)" ++ "@ ++ \t%0,%2,%1 ++ \t%0,%3,%1" ++ [(set_attr "type" "condmove") ++ (set_attr "mode" "")]) ++ ++;; fsel copies the 3rd argument when the 1st is non-zero and the 2nd ++;; argument if the 1st is zero. This means operand 2 and 3 are ++;; inverted in the instruction. ++ ++(define_insn "*sel" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (if_then_else:ANYF ++ (equality_op:FCC (match_operand:FCC 1 "register_operand" "z") ++ (const_int 0)) ++ (match_operand:ANYF 2 "reg_or_0_operand" "f") ++ (match_operand:ANYF 3 "reg_or_0_operand" "f")))] ++ "TARGET_HARD_FLOAT" ++ "fsel\t%0,,%1" ++ [(set_attr "type" "condmove") ++ (set_attr "mode" "")]) ++ ++;; These are the main define_expand's used to make conditional moves. ++ ++(define_expand "movcc" ++ [(set (match_operand:GPR 0 "register_operand") ++ (if_then_else:GPR (match_operator 1 "comparison_operator" ++ [(match_operand:GPR 2 "reg_or_0_operand") ++ (match_operand:GPR 3 "reg_or_0_operand")])))] ++ "TARGET_COND_MOVE_INT" ++{ ++ if(loongarch_expand_conditional_move_la464 (operands)) ++ DONE; ++ else ++ FAIL; ++}) ++ ++(define_expand "movcc" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (if_then_else:ANYF (match_operator 1 "comparison_operator" ++ [(match_operand:ANYF 2 "reg_or_0_operand") ++ (match_operand:ANYF 3 "reg_or_0_operand")])))] ++ "TARGET_COND_MOVE_FLOAT" ++{ ++ ++ if(loongarch_expand_conditional_move_la464 (operands)) ++ DONE; ++ else ++ FAIL; ++}) ++ ++(define_insn "lu32i_d" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ior:DI ++ (zero_extend:DI ++ (subreg:SI (match_operand:DI 1 "register_operand" "0") 0)) ++ (match_operand:DI 2 "const_lu32i_operand" "u")))] ++ "TARGET_64BIT" ++ "lu32i.d\t%0,%X2>>32" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI")]) ++ ++(define_insn "lu52i_d" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ior:DI ++ (and:DI (match_operand:DI 1 "register_operand" "r") ++ (match_operand 2 "lu52i_mask_operand")) ++ (match_operand 3 "const_lu52i_operand" "v")))] ++ "TARGET_64BIT" ++ "lu52i.d\t%0,%1,%X3>>52" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI")]) ++ ++;; Convert floating-point numbers to integers ++(define_insn "frint_" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_FRINT))] ++ "" ++ "frint.\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "")]) ++ + ;; Load the low word of operand 0 with operand 1. + (define_insn "load_low" + [(set (match_operand:SPLITF 0 "register_operand" "=f,f") +@@ -2559,47 +2208,149 @@ + [(set_attr "move_type" "mftg,fpstore") + (set_attr "mode" "")]) + +-;; Move operand 1 to the high word of operand 0 using movgr2frh, preserving the ++;; Thread-Local Storage ++ ++(define_insn "got_load_tls_gd" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_GD))] ++ "" ++ "la.tls.gd\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "got_load_tls_ld" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_LD))] ++ "" ++ "la.tls.ld\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "got_load_tls_le" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_LE))] ++ "" ++ "la.tls.le\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "got_load_tls_ie" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_IE))] ++ "" ++ "la.tls.ie\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the + ;; value in the low word. + (define_insn "movgr2frh" + [(set (match_operand:SPLITF 0 "register_operand" "=f") + (unspec:SPLITF [(match_operand: 1 "reg_or_0_operand" "rJ") +- (match_operand:SPLITF 2 "register_operand" "0")] +- UNSPEC_MOVGR2FRH))] +- "TARGET_HARD_FLOAT && TARGET_FLOAT64" +- "movgr2frh.w\t%z1,%0" ++ (match_operand:SPLITF 2 "register_operand" "0")] ++ UNSPEC_MOVGR2FRH))] ++ "TARGET_DOUBLE_FLOAT" ++ "movgr2frh.w\t%0,%z1" + [(set_attr "move_type" "mgtf") + (set_attr "mode" "")]) + +-;; Move high word of operand 1 to operand 0 using movfrh2gr. ++(define_insn "movsgr2fr" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:SI 1 "register_operand" "r")] ++ UNSPEC_MOVGR2FR))] ++ "TARGET_DOUBLE_FLOAT" ++ "movgr2fr.w\t%0,%1" ++ ) ++(define_insn "movdgr2fr" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:DI 1 "register_operand" "r")] ++ UNSPEC_MOVGR2FR))] ++ "TARGET_DOUBLE_FLOAT" ++ "movgr2fr.d\t%0,%1" ++ ) ++ ++;; Move high word of operand 1 to operand 0 using movfrh2gr.s. + (define_insn "movfrh2gr" + [(set (match_operand: 0 "register_operand" "=r") + (unspec: [(match_operand:SPLITF 1 "register_operand" "f")] + UNSPEC_MOVFRH2GR))] +- "TARGET_HARD_FLOAT && TARGET_FLOAT64" ++ "TARGET_DOUBLE_FLOAT" + "movfrh2gr.s\t%0,%1" + [(set_attr "move_type" "mftg") + (set_attr "mode" "")]) + ++(define_insn "movsfr2gr" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (unspec:GPR [(match_operand:SF 1 "register_operand" "f")] ++ UNSPEC_MOVFR2GR))] ++ "TARGET_DOUBLE_FLOAT" ++ "movfr2gr.s\t%0,%1" ++ ) ++(define_insn "movdfr2gr" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (unspec:GPR [(match_operand:DF 1 "register_operand" "f")] ++ UNSPEC_MOVFR2GR))] ++ "TARGET_DOUBLE_FLOAT" ++ "movfr2gr.d\t%0,%1" ++ ) ++ ++(define_insn "movfr2fcc" ++ [(set (match_operand:FCC 0 "register_operand" "=z") ++ (unspec:FCC [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_MOVFR2FCC))] ++ "TARGET_HARD_FLOAT" ++ "movfr2cf\t%0,%1" ++ [(set_attr "mode" "")]) ++ ++(define_insn "movgr2fcc" ++ [(set (match_operand:FCC 0 "register_operand" "=z") ++ (unspec:FCC [(match_operand:GPR 1 "register_operand" "r")] ++ UNSPEC_MOVGR2FCC))] ++ "TARGET_HARD_FLOAT" ++ "movgr2cf\t%0,%1" ++ [(set_attr "mode" "")]) ++ ++(define_insn "movfcc2gr" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (unspec:GPR [(match_operand:FCC 1 "register_operand" "z")] ++ UNSPEC_MOVFCC2GR))] ++ "TARGET_HARD_FLOAT" ++ "movcf2gr\t%0,%1" ++ [ (set_attr "mode" "")]) ++ ++ + ;; Expand in-line code to clear the instruction cache between operand[0] and + ;; operand[1]. + (define_expand "clear_cache" + [(match_operand 0 "pmode_register_operand") + (match_operand 1 "pmode_register_operand")] + "" +- " + { +- emit_insn (gen_ibar (const0_rtx)); ++ emit_insn (gen_loongarch_ibar (const0_rtx)); + DONE; +-}") ++}) + +-(define_insn "ibar" +- [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_IBAR)] ++(define_insn "loongarch_ibar" ++ [(unspec_volatile:SI ++ [(match_operand 0 "const_uimm15_operand")] ++ UNSPECV_IBAR) ++ (clobber (mem:BLK (scratch)))] + "" + "ibar\t%0") + +-(define_insn "dbar" +- [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_DBAR)] ++(define_insn "loongarch_dbar" ++ [(unspec_volatile:SI ++ [(match_operand 0 "const_uimm15_operand")] ++ UNSPECV_DBAR) ++ (clobber (mem:BLK (scratch)))] + "" + "dbar\t%0") + +@@ -2607,118 +2358,142 @@ + + ;; Privileged state instruction + +-(define_insn "cpucfg" ++(define_insn "loongarch_cpucfg" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")] +- UNSPEC_CPUCFG))] ++ UNSPECV_CPUCFG))] + "" + "cpucfg\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "SI")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "loongarch_syscall" ++ [(unspec_volatile:SI ++ [(match_operand 0 "const_uimm15_operand")] ++ UNSPECV_SYSCALL) ++ (clobber (mem:BLK (scratch)))] ++ "" ++ "syscall\t%0") ++ ++(define_insn "loongarch_break" ++ [(unspec_volatile:SI ++ [(match_operand 0 "const_uimm15_operand")] ++ UNSPECV_BREAK) ++ (clobber (mem:BLK (scratch)))] ++ "" ++ "break\t%0") + +-(define_insn "asrtle_d" +- [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r") +- (match_operand:DI 1 "register_operand" "r")] +- UNSPEC_ASRTLE_D)] ++(define_insn "loongarch_asrtle_d" ++ [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r") ++ (match_operand:DI 1 "register_operand" "r")] ++ UNSPECV_ASRTLE_D)] + "TARGET_64BIT" + "asrtle.d\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "DI")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "DI")]) + +-(define_insn "asrtgt_d" +- [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r") +- (match_operand:DI 1 "register_operand" "r")] +- UNSPEC_ASRTGT_D)] ++(define_insn "loongarch_asrtgt_d" ++ [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r") ++ (match_operand:DI 1 "register_operand" "r")] ++ UNSPECV_ASRTGT_D)] + "TARGET_64BIT" + "asrtgt.d\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "DI")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "DI")]) + +-(define_insn "

csrrd" ++(define_insn "loongarch_csrrd_" + [(set (match_operand:GPR 0 "register_operand" "=r") + (unspec_volatile:GPR [(match_operand 1 "const_uimm14_operand")] +- UNSPEC_CSRRD))] ++ UNSPECV_CSRRD)) ++ (clobber (mem:BLK (scratch)))] + "" + "csrrd\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + +-(define_insn "

csrwr" ++(define_insn "loongarch_csrwr_" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (unspec_volatile:GPR +- [(match_operand:GPR 1 "register_operand" "0") +- (match_operand 2 "const_uimm14_operand")] +- UNSPEC_CSRWR))] ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "register_operand" "0") ++ (match_operand 2 "const_uimm14_operand")] ++ UNSPECV_CSRWR)) ++ (clobber (mem:BLK (scratch)))] + "" + "csrwr\t%0,%2" +- [(set_attr "type" "store") +- (set_attr "mode" "")]) ++ [(set_attr "type" "store") ++ (set_attr "mode" "")]) + +-(define_insn "

csrxchg" ++(define_insn "loongarch_csrxchg_" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (unspec_volatile:GPR +- [(match_operand:GPR 1 "register_operand" "0") +- (match_operand:GPR 2 "register_operand" "q") +- (match_operand 3 "const_uimm14_operand")] +- UNSPEC_CSRXCHG))] ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "register_operand" "0") ++ (match_operand:GPR 2 "register_operand" "q") ++ (match_operand 3 "const_uimm14_operand")] ++ UNSPECV_CSRXCHG)) ++ (clobber (mem:BLK (scratch)))] + "" + "csrxchg\t%0,%2,%3" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + +-(define_insn "iocsrrd_" ++(define_insn "loongarch_iocsrrd_" + [(set (match_operand:QHWD 0 "register_operand" "=r") +- (unspec_volatile:QHWD [(match_operand:SI 1 "register_operand" "r")] +- UNSPEC_IOCSRRD))] ++ (unspec_volatile:QHWD [(match_operand:SI 1 "register_operand" "r")] ++ UNSPECV_IOCSRRD)) ++ (clobber (mem:BLK (scratch)))] + "" + "iocsrrd.\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + +-(define_insn "iocsrwr_" ++(define_insn "loongarch_iocsrwr_" + [(unspec_volatile:QHWD [(match_operand:QHWD 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")] +- UNSPEC_IOCSRWR)] ++ (match_operand:SI 1 "register_operand" "r")] ++ UNSPECV_IOCSRWR) ++ (clobber (mem:BLK (scratch)))] + "" + "iocsrwr.\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + +-(define_insn "

cacop" ++(define_insn "loongarch_cacop_" + [(unspec_volatile:X [(match_operand 0 "const_uimm5_operand") +- (match_operand:X 1 "register_operand" "r") +- (match_operand 2 "const_imm12_operand")] +- UNSPEC_CACOP)] ++ (match_operand:X 1 "register_operand" "r") ++ (match_operand 2 "const_imm12_operand")] ++ UNSPECV_CACOP) ++ (clobber (mem:BLK (scratch)))] + "" + "cacop\t%0,%1,%2" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + +-(define_insn "

lddir" ++(define_insn "loongarch_lddir_" + [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r") +- (match_operand:X 1 "register_operand" "r") +- (match_operand 2 "const_uimm5_operand")] +- UNSPEC_LDDIR)] ++ (match_operand:X 1 "register_operand" "r") ++ (match_operand 2 "const_uimm5_operand")] ++ UNSPECV_LDDIR) ++ (clobber (mem:BLK (scratch)))] + "" + "lddir\t%0,%1,%2" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + +-(define_insn "

ldpte" ++(define_insn "loongarch_ldpte_" + [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r") + (match_operand 1 "const_uimm5_operand")] +- UNSPEC_LDPTE)] ++ UNSPECV_LDPTE) ++ (clobber (mem:BLK (scratch)))] + "" + "ldpte\t%0,%1" +- [(set_attr "type" "load") +- (set_attr "mode" "")]) ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) + + + ;; Block moves, see loongarch.c for more details. +-;; Argument 0 is the destination +-;; Argument 1 is the source +-;; Argument 2 is the length +-;; Argument 3 is the alignment ++;; Argument 0 is the destination. ++;; Argument 1 is the source. ++;; Argument 2 is the length. ++;; Argument 3 is the alignment. + + (define_expand "movmemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand") +@@ -2740,30 +2515,19 @@ + ;; + ;; .................... + +-(define_expand "3" +- [(set (match_operand:GPR 0 "register_operand") +- (any_shift:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:SI 2 "arith_operand")))] +- "" +-{ +-}) +- +-(define_insn "*3" ++(define_insn "3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_shift:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "" + { + if (CONST_INT_P (operands[2])) +- { + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (mode) - 1)); +- return "i.\t%0,%1,%2"; +- } else +- return ".\t%0,%1,%2"; ++ ++ return "%i2.\t%0,%1,%2"; + } + [(set_attr "type" "shift") +- (set_attr "compression" "none") + (set_attr "mode" "")]) + + (define_insn "*si3_extend" +@@ -2774,86 +2538,68 @@ + "TARGET_64BIT" + { + if (CONST_INT_P (operands[2])) +- { + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); +- return "i.w\t%0,%1,%2"; +- } else +- return ".w\t%0,%1,%2"; ++ ++ return "%i2.w\t%0,%1,%2"; + } + [(set_attr "type" "shift") + (set_attr "mode" "SI")]) + +-(define_insn "zero_extend_ashift1" +- [ (set (match_operand:DI 0 "register_operand" "=r") +- (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0) +- (match_operand 2 "const_immlsa_operand" "")) +- (match_operand 3 "shift_mask_operand" "")))] +-"" +-"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2" +-[(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "2")]) +- +-(define_insn "zero_extend_ashift2" +- [ (set (match_operand:DI 0 "register_operand" "=r") +- (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") +- (match_operand 2 "const_immlsa_operand" "")) +- (match_operand 3 "shift_mask_operand" "")))] +-"" +-"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2" +-[(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "2")]) +- +-(define_insn "alsl_paired1" +- [(set (match_operand:DI 0 "register_operand" "=&r") +- (plus:DI (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0) +- (match_operand 2 "const_immlsa_operand" "")) +- (match_operand 3 "shift_mask_operand" "")) +- (match_operand:DI 4 "register_operand" "r")))] +- "" +- "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,%4,%2" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "2")]) +- +-(define_insn "alsl_paired2" +- [(set (match_operand:DI 0 "register_operand" "=&r") +- (plus:DI (match_operand:DI 1 "register_operand" "r") +- (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "r") +- (match_operand 3 "const_immlsa_operand" "")) +- (match_operand 4 "shift_mask_operand" ""))))] +- "" +- "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "2")]) +- +-(define_insn "alsl_" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand 2 "const_immlsa_operand" "")) +- (match_operand:GPR 3 "register_operand" "r")))] +- "ISA_HAS_LSA" +- "alsl.\t%0,%1,%3,%2" +- [(set_attr "type" "arith") +- (set_attr "mode" "")]) +- + (define_insn "rotr3" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r") ++ (rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r") ++ (match_operand:SI 2 "arith_operand" "r,I")))] ++ "" ++ "rotr%i2.\t%0,%1,%2" ++ [(set_attr "type" "shift,shift") ++ (set_attr "mode" "")]) ++ ++;; The following templates were added to generate "bstrpick.d + alsl.d" ++;; instruction pairs. ++;; It is required that the values of const_immalsl_operand and ++;; immediate_operand must have the following correspondence: ++;; ++;; (immediate_operand >> const_immalsl_operand) == 0xffffffff ++ ++(define_insn "zero_extend_ashift" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") ++ (match_operand 2 "const_immalsl_operand" "")) ++ (match_operand 3 "immediate_operand" "")))] ++ "TARGET_64BIT ++ && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)" ++ "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "2")]) ++ ++(define_insn "bstrpick_alsl_paired" ++ [(set (match_operand:DI 0 "register_operand" "=&r") ++ (plus:DI (match_operand:DI 1 "register_operand" "r") ++ (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "r") ++ (match_operand 3 "const_immalsl_operand" "")) ++ (match_operand 4 "immediate_operand" ""))))] ++ "TARGET_64BIT ++ && ((INTVAL (operands[4]) >> INTVAL (operands[3])) == 0xffffffff)" ++ "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "2")]) ++ ++(define_insn "alsl3" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (rotatert:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:SI 2 "arith_operand" "rI")))] ++ (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand 2 "const_immalsl_operand" "")) ++ (match_operand:GPR 3 "register_operand" "r")))] + "" +-{ +- if (CONST_INT_P (operands[2])) +- { +- return "rotri.\t%0,%1,%2"; +- } else +- return "rotr.\t%0,%1,%2"; +-} +- [(set_attr "type" "shift") ++ "alsl.\t%0,%1,%3,%2" ++ [(set_attr "type" "arith") + (set_attr "mode" "")]) + ++ ++ ++;; Reverse the order of bytes of operand 1 and store the result in operand 0. ++ + (define_insn "bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (bswap:HI (match_operand:HI 1 "register_operand" "r")))] +@@ -2867,7 +2613,7 @@ + "" + "#" + "" +- [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH)) ++ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_REVB_2H)) + (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))] + "" + [(set_attr "insn_count" "2")]) +@@ -2878,28 +2624,28 @@ + "TARGET_64BIT" + "#" + "" +- [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH)) +- (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))] ++ [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_REVB_4H)) ++ (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_REVH_D))] + "" + [(set_attr "insn_count" "2")]) + +-(define_insn "wsbh" ++(define_insn "revb_2h" + [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_WSBH))] ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_REVB_2H))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn "dsbh" ++(define_insn "revb_4h" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_DSBH))] ++ (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVB_4H))] + "TARGET_64BIT" + "revb.4h\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn "dshd" ++(define_insn "revh_d" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_DSHD))] ++ (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVH_D))] + "TARGET_64BIT" + "revh.d\t%0,%1" + [(set_attr "type" "shift")]) +@@ -2911,37 +2657,37 @@ + ;; + ;; .................... + +-;; Conditional branches on floating-point equality tests. ++;; Conditional branches + +-(define_insn "*branch_fp_fcc" ++(define_insn "*branch_fp_FCCmode" + [(set (pc) +- (if_then_else +- (match_operator 1 "equality_operator" +- [(match_operand:FCC 2 "register_operand" "z") +- (const_int 0)]) +- (label_ref (match_operand 0 "" "")) +- (pc)))] ++ (if_then_else ++ (match_operator 1 "equality_operator" ++ [(match_operand:FCC 2 "register_operand" "z") ++ (const_int 0)]) ++ (label_ref (match_operand 0 "" "")) ++ (pc)))] + "TARGET_HARD_FLOAT" + { + return loongarch_output_conditional_branch (insn, operands, +- LARCH_BRANCH ("b%F1", "%Z2%0"), +- LARCH_BRANCH ("b%W1", "%Z2%0")); ++ LARCH_BRANCH ("b%F1", "%Z2%0"), ++ LARCH_BRANCH ("b%W1", "%Z2%0")); + } + [(set_attr "type" "branch")]) + +-(define_insn "*branch_fp_inverted_fcc" ++(define_insn "*branch_fp_inverted_FCCmode" + [(set (pc) +- (if_then_else +- (match_operator 1 "equality_operator" +- [(match_operand:FCC 2 "register_operand" "z") +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 0 "" ""))))] ++ (if_then_else ++ (match_operator 1 "equality_operator" ++ [(match_operand:FCC 2 "register_operand" "z") ++ (const_int 0)]) ++ (pc) ++ (label_ref (match_operand 0 "" ""))))] + "TARGET_HARD_FLOAT" + { + return loongarch_output_conditional_branch (insn, operands, +- LARCH_BRANCH ("b%W1", "%Z2%0"), +- LARCH_BRANCH ("b%F1", "%Z2%0")); ++ LARCH_BRANCH ("b%W1", "%Z2%0"), ++ LARCH_BRANCH ("b%F1", "%Z2%0")); + } + [(set_attr "type" "branch")]) + +@@ -2951,28 +2697,26 @@ + [(set (pc) + (if_then_else + (match_operator 1 "order_operator" +- [(match_operand:GPR 2 "register_operand" "r,r") +- (match_operand:GPR 3 "reg_or_0_operand" "J,r")]) ++ [(match_operand:X 2 "register_operand" "r,r") ++ (match_operand:X 3 "reg_or_0_operand" "J,r")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + { return loongarch_output_order_conditional_branch (insn, operands, false); } + [(set_attr "type" "branch") +- (set_attr "compact_form" "maybe,always") + (set_attr "hazard" "forbidden_slot")]) + + (define_insn "*branch_order_inverted" + [(set (pc) + (if_then_else + (match_operator 1 "order_operator" +- [(match_operand:GPR 2 "register_operand" "r,r") +- (match_operand:GPR 3 "reg_or_0_operand" "J,r")]) ++ [(match_operand:X 2 "register_operand" "r,r") ++ (match_operand:X 3 "reg_or_0_operand" "J,r")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + { return loongarch_output_order_conditional_branch (insn, operands, true); } + [(set_attr "type" "branch") +- (set_attr "compact_form" "maybe,always") + (set_attr "hazard" "forbidden_slot")]) + + ;; Conditional branch on equality comparison. +@@ -2981,14 +2725,13 @@ + [(set (pc) + (if_then_else + (match_operator 1 "equality_operator" +- [(match_operand:GPR 2 "register_operand" "r") +- (match_operand:GPR 3 "reg_or_0_operand" "rJ")]) ++ [(match_operand:X 2 "register_operand" "r") ++ (match_operand:X 3 "reg_or_0_operand" "rJ")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + { return loongarch_output_equal_conditional_branch (insn, operands, false); } + [(set_attr "type" "branch") +- (set_attr "compact_form" "maybe") + (set_attr "hazard" "forbidden_slot")]) + + +@@ -2996,22 +2739,21 @@ + [(set (pc) + (if_then_else + (match_operator 1 "equality_operator" +- [(match_operand:GPR 2 "register_operand" "r") +- (match_operand:GPR 3 "reg_or_0_operand" "rJ")]) ++ [(match_operand:X 2 "register_operand" "r") ++ (match_operand:X 3 "reg_or_0_operand" "rJ")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + { return loongarch_output_equal_conditional_branch (insn, operands, true); } + [(set_attr "type" "branch") +- (set_attr "compact_form" "maybe") + (set_attr "hazard" "forbidden_slot")]) + + + (define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" +- [(match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "nonmemory_operand")]) ++ [(match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "nonmemory_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +@@ -3023,8 +2765,8 @@ + (define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" +- [(match_operand:SCALARF 1 "register_operand") +- (match_operand:SCALARF 2 "register_operand")]) ++ [(match_operand:ANYF 1 "register_operand") ++ (match_operand:ANYF 2 "register_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +@@ -3062,71 +2804,63 @@ + DONE; + }) + +-(define_insn "*seq_zero_" +- [(set (match_operand:GPR2 0 "register_operand" "=r") +- (eq:GPR2 (match_operand:GPR 1 "register_operand" "r") ++(define_insn "*seq_zero_" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (eq:GPR (match_operand:X 1 "register_operand" "r") + (const_int 0)))] + "" + "sltui\t%0,%1,1" + [(set_attr "type" "slt") +- (set_attr "mode" "")]) ++ (set_attr "mode" "")]) + + +-(define_insn "*sne_zero_" +- [(set (match_operand:GPR2 0 "register_operand" "=r") +- (ne:GPR2 (match_operand:GPR 1 "register_operand" "r") ++(define_insn "*sne_zero_" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (ne:GPR (match_operand:X 1 "register_operand" "r") + (const_int 0)))] + "" + "sltu\t%0,%.,%1" + [(set_attr "type" "slt") +- (set_attr "mode" "")]) ++ (set_attr "mode" "")]) + +-(define_insn "*sgt_" +- [(set (match_operand:GPR2 0 "register_operand" "=r") +- (any_gt:GPR2 (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "reg_or_0_operand" "rJ")))] ++(define_insn "*sgt_" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (any_gt:GPR (match_operand:X 1 "register_operand" "r") ++ (match_operand:X 2 "reg_or_0_operand" "rJ")))] + "" + "slt\t%0,%z2,%1" + [(set_attr "type" "slt") +- (set_attr "mode" "")]) ++ (set_attr "mode" "")]) + +- +-(define_insn "*sge_" +- [(set (match_operand:GPR2 0 "register_operand" "=r") +- (any_ge:GPR2 (match_operand:GPR 1 "register_operand" "r") ++(define_insn "*sge_" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (any_ge:GPR (match_operand:X 1 "register_operand" "r") + (const_int 1)))] + "" + "slti\t%0,%.,%1" + [(set_attr "type" "slt") +- (set_attr "mode" "")]) ++ (set_attr "mode" "")]) + +-(define_insn "*slt_" +- [(set (match_operand:GPR2 0 "register_operand" "=r") +- (any_lt:GPR2 (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "arith_operand" "rI")))] ++(define_insn "*slt_" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (any_lt:GPR (match_operand:X 1 "register_operand" "r") ++ (match_operand:X 2 "arith_operand" "rI")))] + "" +-{ +- if (CONST_INT_P (operands[2])) +- { +- return "slti\t%0,%1,%2"; +- } else +- return "slt\t%0,%1,%2"; +-} ++ "slt%i2\t%0,%1,%2"; + [(set_attr "type" "slt") +- (set_attr "mode" "")]) ++ (set_attr "mode" "")]) + +- +-(define_insn "*sle_" +- [(set (match_operand:GPR2 0 "register_operand" "=r") +- (any_le:GPR2 (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "sle_operand" "")))] ++(define_insn "*sle_" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (any_le:GPR (match_operand:X 1 "register_operand" "r") ++ (match_operand:X 2 "sle_operand" "")))] + "" + { + operands[2] = GEN_INT (INTVAL (operands[2]) + 1); + return "slti\t%0,%1,%2"; + } + [(set_attr "type" "slt") +- (set_attr "mode" "")]) ++ (set_attr "mode" "")]) + + + ;; +@@ -3136,23 +2870,15 @@ + ;; + ;; .................... + +-(define_insn "s__using_fcc" ++(define_insn "s__using_FCCmode" + [(set (match_operand:FCC 0 "register_operand" "=z") +- (fcond:FCC (match_operand:SCALARF 1 "register_operand" "f") +- (match_operand:SCALARF 2 "register_operand" "f")))] ++ (fcond:FCC (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fcmp..\t%Z0%1,%2" + [(set_attr "type" "fcmp") + (set_attr "mode" "FCC")]) + +-(define_insn "s__using_fcc" +- [(set (match_operand:FCC 0 "register_operand" "=z") +- (swapped_fcond:FCC (match_operand:SCALARF 1 "register_operand" "f") +- (match_operand:SCALARF 2 "register_operand" "f")))] +- "" +- "fcmp..\t%Z0%2,%1" +- [(set_attr "type" "fcmp") +- (set_attr "mode" "FCC")]) + + ;; + ;; .................... +@@ -3170,24 +2896,20 @@ + (define_insn "*jump_absolute" + [(set (pc) + (label_ref (match_operand 0)))] +- "TARGET_ABSOLUTE_JUMPS" ++ "!flag_pic" + { +- return LARCH_ABSOLUTE_JUMP ("b\t%l0"); ++ return "b\t%l0"; + } +- [(set_attr "type" "branch") +- (set_attr "compact_form" "maybe")]) ++ [(set_attr "type" "branch")]) + + (define_insn "*jump_pic" + [(set (pc) + (label_ref (match_operand 0)))] +- "!TARGET_ABSOLUTE_JUMPS" ++ "flag_pic" + { + return "b\t%0"; + } +- [(set_attr "type" "branch") +- (set_attr "compact_form" "maybe")]) +- +- ++ [(set_attr "type" "branch")]) + + (define_expand "indirect_jump" + [(set (pc) (match_operand 0 "register_operand"))] +@@ -3198,12 +2920,10 @@ + DONE; + }) + +-(define_insn "indirect_jump_" ++(define_insn "indirect_jump" + [(set (pc) (match_operand:P 0 "register_operand" "r"))] + "" +- { +- return "jr\t%0"; +- } ++ "jr\t%0" + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + +@@ -3214,25 +2934,25 @@ + "" + { + if (flag_pic) +- operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], +- gen_rtx_LABEL_REF (Pmode, operands[1]), +- NULL_RTX, 0, OPTAB_DIRECT); ++ operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], ++ gen_rtx_LABEL_REF (Pmode, ++ operands[1]), ++ NULL_RTX, 0, OPTAB_DIRECT); + emit_jump_insn (PMODE_INSN (gen_tablejump, (operands[0], operands[1]))); + DONE; + }) + +-(define_insn "tablejump_" ++(define_insn "tablejump" + [(set (pc) + (match_operand:P 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "" +- { +- return "jr\t%0"; +- } ++ "jr\t%0" + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + + ++ + ;; + ;; .................... + ;; +@@ -3254,22 +2974,25 @@ + ;; saved or used to pass arguments. + + (define_insn "blockage" +- [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)] ++ [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "type" "ghost") + (set_attr "mode" "none")]) + +-(define_insn "probe_stack_range_" ++(define_insn "probe_stack_range" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r") +- (match_operand:P 3 "register_operand" "r")] +- UNSPEC_PROBE_STACK_RANGE))] ++ (match_operand:P 3 "register_operand" "r")] ++ UNSPECV_PROBE_STACK_RANGE))] + "" +- { return loongarch_output_probe_stack_range (operands[0], operands[2], operands[3]); } ++{ ++ return loongarch_output_probe_stack_range (operands[0], ++ operands[2], ++ operands[3]); ++} + [(set_attr "type" "unknown") +- (set_attr "can_delay" "no") + (set_attr "mode" "")]) + + (define_expand "epilogue" +@@ -3304,12 +3027,12 @@ + (define_insn "*" + [(any_return)] + "" +- { +- operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); +- return "jr\t%0"; +- } +- [(set_attr "type" "jump") +- (set_attr "mode" "none")]) ++{ ++ operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); ++ return "jr\t%0"; ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) + + ;; Normal return. + +@@ -3317,46 +3040,18 @@ + [(any_return) + (use (match_operand 0 "pmode_register_operand" ""))] + "" +- { +- return "jr\t%0"; +- } +- [(set_attr "type" "jump") +- (set_attr "mode" "none")]) +- +-;; Exception return. +-(define_insn "loongarch_ertn" +- [(return) +- (unspec_volatile [(const_int 0)] UNSPEC_ERTN)] +- "" +- "ertn" +- [(set_attr "type" "trap") +- (set_attr "mode" "none")]) +- +-;; Disable interrupts. +-(define_insn "loongarch_di" +- [(unspec_volatile [(const_int 0)] UNSPEC_DI)] +- "" +- "di" +- [(set_attr "type" "trap") +- (set_attr "mode" "none")]) +- +-;; Execution hazard barrier. +-(define_insn "loongarch_ehb" +- [(unspec_volatile [(const_int 0)] UNSPEC_EHB)] +- "" +- "ehb" +- [(set_attr "type" "trap") +- (set_attr "mode" "none")]) ++ "jr\t%0" ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) + +-;; Read GPR from previous shadow register set. +-(define_insn "loongarch_rdpgpr_" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec_volatile:P [(match_operand:P 1 "register_operand" "r")] +- UNSPEC_RDPGPR))] ++;; Exception return. ++(define_insn "loongarch_ertn" ++ [(return) ++ (unspec_volatile [(const_int 0)] UNSPECV_ERTN)] + "" +- "rdpgpr\t%0,%1" +- [(set_attr "type" "move") +- (set_attr "mode" "")]) ++ "ertn" ++ [(set_attr "type" "trap") ++ (set_attr "mode" "none")]) + + ;; This is used in compiling the unwind routines. + (define_expand "eh_return" +@@ -3366,22 +3061,22 @@ + if (GET_MODE (operands[0]) != word_mode) + operands[0] = convert_to_mode (word_mode, operands[0], 0); + if (TARGET_64BIT) +- emit_insn (gen_eh_set_lr_di (operands[0])); ++ emit_insn (gen_eh_set_ra_di (operands[0])); + else +- emit_insn (gen_eh_set_lr_si (operands[0])); ++ emit_insn (gen_eh_set_ra_si (operands[0])); + DONE; + }) + + ;; Clobber the return address on the stack. We can't expand this + ;; until we know where it will be put in the stack frame. + +-(define_insn "eh_set_lr_si" ++(define_insn "eh_set_ra_si" + [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "! TARGET_64BIT" + "#") + +-(define_insn "eh_set_lr_di" ++(define_insn "eh_set_ra_di" + [(unspec [(match_operand:DI 0 "register_operand" "r")] UNSPEC_EH_RETURN) + (clobber (match_scratch:DI 1 "=&r"))] + "TARGET_64BIT" +@@ -3406,23 +3101,14 @@ + ;; + ;; .................... + +- + ;; Sibling calls. All these patterns use jump instructions. + +-;; If TARGET_SIBCALLS, call_insn_operand will only accept constant +-;; addresses if a direct jump is acceptable. Since the 'S' constraint +-;; is defined in terms of call_insn_operand, the same is true of the +-;; constraints. +- +-;; When we use an indirect jump, we need a register that will be +-;; preserved by the epilogue. +- + (define_expand "sibcall" + [(parallel [(call (match_operand 0 "") + (match_operand 1 "")) + (use (match_operand 2 "")) ;; next_arg_reg + (use (match_operand 3 ""))])] ;; struct_value_size_rtx +- "TARGET_SIBCALLS" ++ "" + { + rtx target = loongarch_legitimize_call_address (XEXP (operands[0], 0)); + +@@ -3433,172 +3119,170 @@ + (define_insn "sibcall_internal" + [(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,a,t,h")) + (match_operand 1 "" ""))] +- "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" ++ "SIBLING_CALL_P (insn)" + { + switch (which_alternative) + { + case 0: + return "jr\t%0"; + case 1: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r12,(%%pcrel(%0+0x20000))>>18\n\t" +- "jirl\t$r0,$r12,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.local\t$r12,$r13,%0\n\tjr\t$r12"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r12,(%%pcrel(%0+0x20000))>>18\n\t" ++ "jirl\t$r0,$r12,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.local\t$r12,$r13,%0\n\tjr\t$r12"; + else +- return "b\t%0"; ++ return "b\t%0"; + case 2: +- if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) +- return "b\t%0"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r12,$r13,%0\n\tjr\t$r12"; ++ if (TARGET_CMODEL_TINY_STATIC) ++ return "b\t%0"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r12,$r13,%0\n\tjr\t$r12"; + else +- return "la.global\t$r12,%0\n\tjr\t$r12"; ++ return "la.global\t$r12,%0\n\tjr\t$r12"; + case 3: +- if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r12,$r13,%0\n\tjr\t$r12"; ++ if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r12,$r13,%0\n\tjr\t$r12"; + else +- return "la.global\t$r12,%0\n\tjr\t$r12"; ++ return "la.global\t$r12,%0\n\tjr\t$r12"; + case 4: +- if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) +- return "b\t%%plt(%0)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r12,(%%plt(%0)+0x20000)>>18\n\t" +- "jirl\t$r0,$r12,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; ++ if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY) ++ return "b\t%%plt(%0)"; ++ else if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r12,(%%plt(%0)+0x20000)>>18\n\t" ++ "jirl\t$r0,$r12,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; + else +- sorry ("cmodel extreme and tiny static not support plt."); ++ /* Code model "extreme" and "tiny-static" do not support plt. */ ++ gcc_unreachable (); + default: + gcc_unreachable (); + } + } +- [(set_attr "jal" "indirect,direct,direct,direct,direct")]) ++ [(set_attr "jirl" "indirect,direct,direct,direct,direct")]) + + (define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "") + (call (match_operand 1 "") + (match_operand 2 ""))) + (use (match_operand 3 ""))])] ;; next_arg_reg +- "TARGET_SIBCALLS" ++ "" + { + rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0)); + +- /* Handle return values created by loongarch_return_fpr_pair. */ ++ /* Handle return values created by loongarch_pass_fpr_pair. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2) + { +- emit_call_insn (gen_sibcall_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0), +- target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0))); ++ rtx arg1 = XEXP (XVECEXP (operands[0],0, 0), 0); ++ rtx arg2 = XEXP (XVECEXP (operands[0],0, 1), 0); ++ ++ emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target, ++ operands[2], ++ arg2)); + } + else + { +- /* Handle return values created by loongarch_return_fpr_single. */ ++ /* Handle return values created by loongarch_return_fpr_single. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1) +- operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); +- +- emit_call_insn (gen_sibcall_value_internal (operands[0], target, operands[2])); ++ operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); ++ ++ emit_call_insn (gen_sibcall_value_internal (operands[0], target, ++ operands[2])); + } + DONE; + }) + + (define_insn "sibcall_value_internal" + [(set (match_operand 0 "register_operand" "") +- (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) +- (match_operand 2 "" "")))] +- "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" ++ (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) ++ (match_operand 2 "" "")))] ++ "SIBLING_CALL_P (insn)" + { + switch (which_alternative) + { + case 0: + return "jr\t%1"; + case 1: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t" +- "jirl\t$r0,$r12,%%pcrel(%1+4)-((%%pcrel(%1+4+0x20000))>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.local\t$r12,$r13,%1\n\t" +- "jr\t$r12"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t" ++ "jirl\t$r0,$r12,%%pcrel(%1+4)-((%%pcrel(%1+4+0x20000))>>18<<18)"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.local\t$r12,$r13,%1\n\tjr\t$r12"; + else +- return "b\t%1"; ++ return "b\t%1"; + case 2: +- if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) +- return "b\t%1"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r12,$r13,%1\n\t" +- "jr\t$r12"; ++ if (TARGET_CMODEL_TINY_STATIC) ++ return "b\t%1"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r12,$r13,%1\n\tjr\t$r12"; + else +- return "la.global\t$r12,%1\n\t" +- "jr\t$r12"; ++ return "la.global\t$r12,%1\n\tjr\t$r12"; + case 3: +- if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r12,$r13,%1\n\t" +- "jr\t$r12"; ++ if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r12,$r13,%1\n\tjr\t$r12"; + else +- return "la.global\t$r12,%1\n\t" +- "jr\t$r12"; ++ return "la.global\t$r12,%1\n\tjr\t$r12"; + case 4: +- if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) +- return " b\t%%plt(%1)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t" +- "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY) ++ return " b\t%%plt(%1)"; ++ else if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t" ++ "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; + else +- sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ /* Code model "extreme" and "tiny-static" do not support plt. */ ++ gcc_unreachable (); + default: + gcc_unreachable (); + } + } +- [(set_attr "jal" "indirect,direct,direct,direct,direct")]) ++ [(set_attr "jirl" "indirect,direct,direct,direct,direct")]) + + (define_insn "sibcall_value_multiple_internal" + [(set (match_operand 0 "register_operand" "") +- (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) +- (match_operand 2 "" ""))) ++ (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) ++ (match_operand 2 "" ""))) + (set (match_operand 3 "register_operand" "") + (call (mem:SI (match_dup 1)) + (match_dup 2)))] +- "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" ++ "SIBLING_CALL_P (insn)" + { + switch (which_alternative) + { + case 0: + return "jr\t%1"; + case 1: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t" +- "jirl\t$r0,$r12,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.local\t$r12,$r13,%1\n\t" +- "jr\t$r12"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t" ++ "jirl\t$r0,$r12,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.local\t$r12,$r13,%1\n\tjr\t$r12"; + else +- return "b\t%1"; ++ return "b\t%1"; + case 2: +- if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) +- return "b\t%1"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r12,$r13,%1\n\t" +- "jr\t$r12"; ++ if (TARGET_CMODEL_TINY_STATIC) ++ return "b\t%1"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r12,$r13,%1\n\tjr\t$r12"; + else +- return "la.global\t$r12,%1\n\t" +- "jr\t$r12"; ++ return "la.global\t$r12,%1\n\tjr\t$r12"; + case 3: +- if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r12,$r13,%1\n\t" +- "jr\t$r12"; ++ if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r12,$r13,%1\n\tjr\t$r12"; + else +- return "la.global\t$r12,%1\n\t" +- "jr\t$r12"; ++ return "la.global\t$r12,%1\n\tjr\t$r12"; + case 4: +- if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) +- return "b\t%%plt(%1)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t" +- "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY) ++ return "b\t%%plt(%1)"; ++ else if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t" ++ "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; + else +- sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ /* Code model "extreme" and "tiny-static" do not support plt. */ ++ gcc_unreachable (); + default: + gcc_unreachable (); + } + } +- [(set_attr "jal" "indirect,direct,direct,direct,direct")]) ++ [(set_attr "jirl" "indirect,direct,direct,direct,direct")]) + + (define_expand "call" + [(parallel [(call (match_operand 0 "") +@@ -3612,22 +3296,6 @@ + emit_call_insn (gen_call_internal (target, operands[1])); + DONE; + }) +-;; In the last case, we can generate the individual instructions with +-;; a define_split. There are several things to be wary of: +-;; +-;; - We can't expose the load of $gp before reload. If we did, +-;; it might get removed as dead, but reload can introduce new +-;; uses of $gp by rematerializing constants. +-;; +-;; - We shouldn't restore $gp after calls that never return. +-;; It isn't valid to insert instructions between a noreturn +-;; call and the following barrier. +-;; +-;; - The splitter deliberately changes the liveness of $gp. The unsplit +-;; instruction preserves $gp and so have no effect on its liveness. +-;; But once we generate the separate insns, it becomes obvious that +-;; $gp is not live on entry to the call. +-;; + + (define_insn "call_internal" + [(call (mem:SI (match_operand 0 "call_insn_operand" "e,c,a,t,h")) +@@ -3640,46 +3308,41 @@ + case 0: + return "jirl\t$r1,%0,0"; + case 1: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r1,%%pcrel(%0+0x20000)>>18\n\t" +- "jirl\t$r1,$r1,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.local\t$r1,$r12,%0\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r1,%%pcrel(%0+0x20000)>>18\n\t" ++ "jirl\t$r1,$r1,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.local\t$r1,$r12,%0\n\tjirl\t$r1,$r1,0"; + else +- return "bl\t%0"; ++ return "bl\t%0"; + case 2: +- if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) +- return "bl\t%0"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r1,$r12,%0\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_TINY_STATIC) ++ return "bl\t%0"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r1,$r12,%0\n\tjirl\t$r1,$r1,0"; + else +- return "la.global\t$r1,%0\n\t" +- "jirl\t$r1,$r1,0"; ++ return "la.global\t$r1,%0\n\tjirl\t$r1,$r1,0"; + case 3: +- if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r1,$r12,%0\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r1,$r12,%0\n\tjirl\t$r1,$r1,0"; + else +- return "la.global\t$r1,%0\n\t" +- "jirl\t$r1,$r1,0"; ++ return "la.global\t$r1,%0\n\tjirl\t$r1,$r1,0"; + case 4: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r1,(%%plt(%0)+0x20000)>>18\n\t" +- "jirl\t$r1,$r1,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) +- return "bl\t%%plt(%0)"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r1,(%%plt(%0)+0x20000)>>18\n\t" ++ "jirl\t$r1,$r1,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; ++ else if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY) ++ return "bl\t%%plt(%0)"; + else +- sorry ("cmodel extreme and tiny-static not support plt."); ++ /* Code model "extreme" and "tiny-static" do not support plt. */ ++ gcc_unreachable (); + default: + gcc_unreachable (); + } + } +- [(set_attr "jal" "indirect,direct,direct,direct,direct") ++ [(set_attr "jirl" "indirect,direct,direct,direct,direct") + (set_attr "insn_count" "1,2,3,3,2")]) + +- + (define_expand "call_value" + [(parallel [(set (match_operand 0 "") + (call (match_operand 1 "") +@@ -3688,26 +3351,31 @@ + "" + { + rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0)); +- /* Handle return values created by loongarch_return_fpr_pair. */ ++ /* Handle return values created by loongarch_pass_fpr_pair. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2) +- emit_call_insn (gen_call_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0), +- target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0))); ++ { ++ rtx arg1 = XEXP (XVECEXP (operands[0], 0, 0), 0); ++ rtx arg2 = XEXP (XVECEXP (operands[0], 0, 1), 0); ++ ++ emit_call_insn (gen_call_value_multiple_internal (arg1, target, ++ operands[2], arg2)); ++ } + else + { +- /* Handle return values created by loongarch_return_fpr_single. */ ++ /* Handle return values created by loongarch_return_fpr_single. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1) +- operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); +- +- emit_call_insn (gen_call_value_internal (operands[0], target, operands[2])); ++ operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); ++ ++ emit_call_insn (gen_call_value_internal (operands[0], target, ++ operands[2])); + } + DONE; + }) + +-;; See comment for call_internal. + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "") +- (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) +- (match_operand 2 "" ""))) ++ (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) ++ (match_operand 2 "" ""))) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" + { +@@ -3716,50 +3384,45 @@ + case 0: + return "jirl\t$r1,%1,0"; + case 1: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t" +- "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.local\t$r1,$r12,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t" ++ "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.local\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0"; + else +- return "bl\t%1"; ++ return "bl\t%1"; + case 2: +- if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) +- return "bl\t%1"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r1,$r12,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_TINY_STATIC) ++ return "bl\t%1"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0"; + else +- return "la.global\t$r1,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0"; + case 3: +- if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r1,$r12,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0"; + else +- return "la.global\t$r1,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0"; + case 4: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t" +- "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) +- return "bl\t%%plt(%1)"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t" ++ "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ else if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY) ++ return "bl\t%%plt(%1)"; + else +- sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ /* Code model "extreme" and "tiny-static" do not support plt. */ ++ gcc_unreachable (); + default: + gcc_unreachable (); + } + } +- [(set_attr "jal" "indirect,direct,direct,direct,direct") ++ [(set_attr "jirl" "indirect,direct,direct,direct,direct") + (set_attr "insn_count" "1,2,3,3,2")]) + +-;; See comment for call_internal. + (define_insn "call_value_multiple_internal" + [(set (match_operand 0 "register_operand" "") +- (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) +- (match_operand 2 "" ""))) ++ (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) ++ (match_operand 2 "" ""))) + (set (match_operand 3 "register_operand" "") + (call (mem:SI (match_dup 1)) + (match_dup 2))) +@@ -3771,48 +3434,43 @@ + case 0: + return "jirl\t$r1,%1,0"; + case 1: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t" +- "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.local\t$r1,$r12,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t" ++ "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.local\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0"; + else +- return "bl\t%1"; ++ return "bl\t%1"; + case 2: +- if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) +- return "bl\t%1"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r1,$r12,%1\n\t" +- "jirl\t$r1,$r1,0 "; ++ if (TARGET_CMODEL_TINY_STATIC) ++ return "bl\t%1"; ++ else if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0 "; + else +- return "la.global\t$r1,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0"; + case 3: +- if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) +- return "la.global\t$r1,$r12,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ if (TARGET_CMODEL_EXTREME) ++ return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0"; + else +- return "la.global\t$r1,%1\n\t" +- "jirl\t$r1,$r1,0"; ++ return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0"; + case 4: +- if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) +- return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t" +- "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; +- else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) +- return "bl\t%%plt(%1)"; ++ if (TARGET_CMODEL_LARGE) ++ return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t" ++ "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ else if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY) ++ return "bl\t%%plt(%1)"; + else +- sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ /* Code model "extreme" and "tiny-static" do not support plt. */ ++ gcc_unreachable (); + default: + gcc_unreachable (); + } + } +- [(set_attr "jal" "indirect,direct,direct,direct,direct") ++ [(set_attr "jirl" "indirect,direct,direct,direct,direct") + (set_attr "insn_count" "1,2,3,3,2")]) + + + ;; Call subroutine returning any type. +- + (define_expand "untyped_call" + [(parallel [(call (match_operand 0 "") + (const_int 0)) +@@ -3842,105 +3500,109 @@ + ;; .................... + ;; + ++(define_insn "prefetch" ++ [(prefetch (match_operand 0 "address_operand" "p") ++ (match_operand 1 "const_int_operand" "n") ++ (match_operand 2 "const_int_operand" "n"))] ++ "" ++ { ++ operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]); ++ return "preld\t%1,%a0"; ++ } ++ [(set_attr "type" "prefetch")]) + + (define_insn "*prefetch_indexed_" +- [(prefetch (plus:P (match_operand:P 0 "register_operand" "r") +- (match_operand:P 1 "register_operand" "r")) +- (match_operand 2 "const_int_operand" "n") +- (match_operand 3 "const_int_operand" "n"))] +- "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +-{ +- operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]); +- return "prefx\t%2,%1(%0)"; +-} ++ [(prefetch (plus:P (match_operand 0 "register_operand" "r") ++ (match_operand 1 "register_operand" "r")) ++ (match_operand 2 "const_int_operand" "n") ++ (match_operand 3 "const_int_operand" "n"))] ++ "" ++ { ++ operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]); ++ return "preldx\t%2,%1,%0"; ++ } + [(set_attr "type" "prefetchx")]) + + (define_insn "nop" + [(const_int 0)] + "" + "nop" +- [(set_attr "type" "nop") +- (set_attr "mode" "none")]) +- +-;; Like nop, but commented out when outside a .set noreorder block. +-(define_insn "hazard_nop" +- [(const_int 1)] +- "" +- { +- return "#nop"; +- } +- [(set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none")]) + +-;; The `.insn' pseudo-op. +-(define_insn "insn_pseudo" +- [(unspec_volatile [(const_int 0)] UNSPEC_INSN_PSEUDO)] +- "" +- ".insn" +- [(set_attr "mode" "none") +- (set_attr "insn_count" "0")]) +- +-;; Conditional move instructions. ++;; __builtin_loongarch_movfcsr2gr: move the FCSR into operand 0. ++(define_insn "loongarch_movfcsr2gr" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(match_operand 1 "const_uimm5_operand")] ++ UNSPECV_MOVFCSR2GR))] ++ "TARGET_HARD_FLOAT" ++ "movfcsr2gr\t%0,$r%1") + +-(define_insn "*sel_using_" +- [(set (match_operand:GPR 0 "register_operand" "=r,r") +- (if_then_else:GPR +- (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r") +- (const_int 0)) +- (match_operand:GPR 2 "reg_or_0_operand" "r,J") +- (match_operand:GPR 3 "reg_or_0_operand" "J,r")))] +- "register_operand (operands[2], mode) +- != register_operand (operands[3], mode)" +- "@ +- \t%0,%2,%1 +- \t%0,%3,%1" +- [(set_attr "type" "condmove") +- (set_attr "mode" "")]) ++;; __builtin_loongarch_movgr2fcsr: move operand 0 into the FCSR. ++(define_insn "loongarch_movgr2fcsr" ++ [(unspec_volatile [(match_operand 0 "const_uimm5_operand") ++ (match_operand:SI 1 "register_operand" "r")] ++ UNSPECV_MOVGR2FCSR)] ++ "TARGET_HARD_FLOAT" ++ "movgr2fcsr\t$r%0,%1") + +-;; sel.fmt copies the 3rd argument when the 1st is non-zero and the 2nd +-;; argument if the 1st is zero. This means operand 2 and 3 are +-;; inverted in the instruction. ++(define_insn "fclass_" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_FCLASS))] ++ "TARGET_HARD_FLOAT" ++ "fclass.\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "")]) + +-(define_insn "*sel" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (if_then_else:SCALARF +- (ne:FCC (match_operand:FCC 1 "register_operand" "z") +- (const_int 0)) +- (match_operand:SCALARF 2 "reg_or_0_operand" "f") +- (match_operand:SCALARF 3 "reg_or_0_operand" "f")))] ++(define_insn "bytepick_w" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 3 "const_0_to_3_operand" "n")] ++ UNSPEC_BYTEPICK_W))] + "" +- "fsel\t%0,%3,%2,%1" +- [(set_attr "type" "condmove") +- (set_attr "mode" "")]) ++ "bytepick.w\t%0,%1,%2,%z3" ++ [(set_attr "mode" "SI")]) + +-;; These are the main define_expand's used to make conditional moves. ++(define_insn "bytepick_d" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "r") ++ (match_operand:DI 2 "register_operand" "r") ++ (match_operand:DI 3 "const_0_to_7_operand" "n")] ++ UNSPEC_BYTEPICK_D))] ++ "" ++ "bytepick.d\t%0,%1,%2,%z3" ++ [(set_attr "mode" "DI")]) + +-(define_expand "movcc" +- [(set (match_operand:GPR 0 "register_operand") +- (if_then_else:GPR (match_operator 1 "comparison_operator" +- [(match_operand:GPR 2 "reg_or_0_operand") +- (match_operand:GPR 3 "reg_or_0_operand")])))] +- "TARGET_COND_MOVE_INT" +-{ +- if (!INTEGRAL_MODE_P (GET_MODE (XEXP (operands[1], 0)))) +- FAIL; ++(define_insn "bitrev_4b" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] ++ UNSPEC_BITREV_4B))] ++ "" ++ "bitrev.4b\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "SI")]) + +- loongarch_expand_conditional_move (operands); +- DONE; +-}) ++(define_insn "bitrev_8b" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "r")] ++ UNSPEC_BITREV_8B))] ++ "" ++ "bitrev.8b\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "DI")]) + +-(define_expand "movcc" +- [(set (match_operand:SCALARF 0 "register_operand") +- (if_then_else:SCALARF (match_operator 1 "comparison_operator" +- [(match_operand:SCALARF 2 "reg_or_0_operand") +- (match_operand:SCALARF 3 "reg_or_0_operand")])))] +- "TARGET_COND_MOVE_FLOAT" +-{ +- if (!FLOAT_MODE_P (GET_MODE (XEXP (operands[1], 0)))) +- FAIL; ++(define_insn "stack_tie" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK [(match_operand:X 0 "register_operand" "r") ++ (match_operand:X 1 "register_operand" "r")] ++ UNSPEC_TIE))] ++ "" ++ "" ++ [(set_attr "length" "0") ++ (set_attr "type" "ghost")]) + +- loongarch_expand_conditional_move (operands); +- DONE; +-}) + + (define_split + [(match_operand 0 "small_data_pattern")] +@@ -3948,97 +3610,30 @@ + [(match_dup 0)] + { operands[0] = loongarch_rewrite_small_data (operands[0]); }) + +-;; Thread-Local Storage +- +-(define_insn "got_load_tls_gd" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_GD))] +- "" +- "la.tls.gd\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) +- +-(define_insn "got_load_tls_ld" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_LD))] +- "" +- "la.tls.ld\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) +- +-(define_insn "got_load_tls_le" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_LE))] +- "" +- "la.tls.le\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) +- +-(define_insn "got_load_tls_ie" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_IE))] +- "" +- "la.tls.ie\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) +- +-(define_insn "loongarch_movfcsr2gr" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec_volatile:SI [(match_operand 1 "const_uimm5_operand")] UNSPEC_MOVFCSR2GR))] +- "TARGET_HARD_FLOAT" +- "movfcsr2gr\t%0,$r%1") +- +-(define_insn "loongarch_movgr2fcsr" +- [(unspec_volatile [(match_operand 0 "const_uimm5_operand") +- (match_operand:SI 1 "register_operand" "r")] +- UNSPEC_MOVGR2FCSR)] +- "TARGET_HARD_FLOAT" +- "movgr2fcsr\t$r%0,%1") +- + + ;; Match paired HI/SI/SF/DFmode load/stores. + (define_insn "*join2_load_store" +- [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand" "=r,f,m,m,r,ZC") ++ [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand" ++ "=&r,f,m,m,&r,ZC") + (match_operand:JOIN_MODE 1 "nonimmediate_operand" "m,m,r,f,ZC,r")) +- (set (match_operand:JOIN_MODE 2 "nonimmediate_operand" "=r,f,m,m,r,ZC") ++ (set (match_operand:JOIN_MODE 2 "nonimmediate_operand" ++ "=r,f,m,m,r,ZC") + (match_operand:JOIN_MODE 3 "nonimmediate_operand" "m,m,r,f,ZC,r"))] + "reload_completed" + { +- bool load_p = (which_alternative == 0 || which_alternative == 1); +- /* Reg-renaming pass reuses base register if it is dead after bonded loads. +- Hardware does not bond those loads, even when they are consecutive. +- However, order of the loads need to be checked for correctness. */ +- if (!load_p || !reg_overlap_mentioned_p (operands[0], operands[1])) +- { +- output_asm_insn (loongarch_output_move (operands[0], operands[1]), +- operands); +- output_asm_insn (loongarch_output_move (operands[2], operands[3]), +- &operands[2]); +- } +- else +- { +- output_asm_insn (loongarch_output_move (operands[2], operands[3]), +- &operands[2]); +- output_asm_insn (loongarch_output_move (operands[0], operands[1]), +- operands); +- } ++ /* The load destination does not overlap the source. */ ++ gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])); ++ output_asm_insn (loongarch_output_move (operands[0], operands[1]), ++ operands); ++ output_asm_insn (loongarch_output_move (operands[2], operands[3]), ++ &operands[2]); + return ""; + } +- [(set_attr "move_type" "load,fpload,store,fpstore,load,store") ++ [(set_attr "move_type" ++ "load,fpload,store,fpstore,load,store") + (set_attr "insn_count" "2,2,2,2,2,2")]) + +-;; 2 HI/SI/SF/DF loads are joined. +-;; P5600 does not support bonding of two LBs, hence QI mode is not included. +-;; The loads must be non-volatile as they might be reordered at the time of asm +-;; generation. ++;; 2 HI/SI/SF/DF loads are bonded. + (define_peephole2 + [(set (match_operand:JOIN_MODE 0 "register_operand") + (match_operand:JOIN_MODE 1 "non_volatile_mem_operand")) +@@ -4051,8 +3646,7 @@ + (match_dup 3))])] + "") + +-;; 2 HI/SI/SF/DF stores are joined. +-;; P5600 does not support bonding of two SBs, hence QI mode is not included. ++;; 2 HI/SI/SF/DF stores are bonded. + (define_peephole2 + [(set (match_operand:JOIN_MODE 0 "memory_operand") + (match_operand:JOIN_MODE 1 "register_operand")) +@@ -4067,25 +3661,16 @@ + + ;; Match paired HImode loads. + (define_insn "*join2_loadhi" +- [(set (match_operand:SI 0 "register_operand" "=r") ++ [(set (match_operand:SI 0 "register_operand" "=&r") + (any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand" "m"))) + (set (match_operand:SI 2 "register_operand" "=r") + (any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand" "m")))] + "reload_completed" + { +- /* Reg-renaming pass reuses base register if it is dead after bonded loads. +- Hardware does not bond those loads, even when they are consecutive. +- However, order of the loads need to be checked for correctness. */ +- if (!reg_overlap_mentioned_p (operands[0], operands[1])) +- { +- output_asm_insn ("ld.h\t%0,%1", operands); +- output_asm_insn ("ld.h\t%2,%3", operands); +- } +- else +- { +- output_asm_insn ("ld.h\t%2,%3", operands); +- output_asm_insn ("ld.h\t%0,%1", operands); +- } ++ /* The load destination does not overlap the source. */ ++ gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])); ++ output_asm_insn ("ld.h\t%0,%1", operands); ++ output_asm_insn ("ld.h\t%2,%3", operands); + + return ""; + } +@@ -4093,7 +3678,7 @@ + (set_attr "insn_count" "2")]) + + +-;; 2 HI loads are joined. ++;; 2 HI loads are bonded. + (define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand"))) +@@ -4107,153 +3692,10 @@ + "") + + +-;; Logical AND NOT. +-(define_insn "loongson_gsandn" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR +- (not:GPR (match_operand:GPR 1 "register_operand" "r")) +- (match_operand:GPR 2 "register_operand" "r")))] +- "" +- "andn\t%0,%2,%1" +- [(set_attr "type" "logical")]) +- +-;; Logical AND NOT. +-(define_insn "loongson_gsorn" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (ior:GPR +- (not:GPR (match_operand:GPR 1 "register_operand" "r")) +- (match_operand:GPR 2 "register_operand" "r")))] +- "" +- "orn\t%0,%2,%1" +- [(set_attr "type" "logical")]) +- +-(define_insn "smax3" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (smax:SCALARF (match_operand:SCALARF 1 "register_operand" "f") +- (match_operand:SCALARF 2 "register_operand" "f")))] +- "TARGET_HARD_FLOAT" +- "fmax.\t%0,%1,%2" +- [(set_attr "type" "fmove") +- (set_attr "mode" "")]) +- +-(define_insn "smin3" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (smin:SCALARF (match_operand:SCALARF 1 "register_operand" "f") +- (match_operand:SCALARF 2 "register_operand" "f")))] +- "TARGET_HARD_FLOAT" +- "fmin.\t%0,%1,%2" +- [(set_attr "type" "fmove") +- (set_attr "mode" "")]) +- +-(define_insn "smaxa3" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (if_then_else:SCALARF +- (gt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f")) +- (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f"))) +- (match_dup 1) +- (match_dup 2)))] +- "TARGET_HARD_FLOAT" +- "fmaxa.\t%0,%1,%2" +- [(set_attr "type" "fmove") +- (set_attr "mode" "")]) +- +-(define_insn "smina3" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (if_then_else:SCALARF +- (lt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f")) +- (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f"))) +- (match_dup 1) +- (match_dup 2)))] +- "TARGET_HARD_FLOAT" +- "fmina.\t%0,%1,%2" +- [(set_attr "type" "fmove") +- (set_attr "mode" "")]) +- +-(define_insn "frint_" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")] +- UNSPEC_FRINT))] +- "" +- "frint.\t%0,%1" +- [(set_attr "type" "fcvt") +- (set_attr "mode" "")]) +- +-(define_insn "fclass_" +- [(set (match_operand:SCALARF 0 "register_operand" "=f") +- (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")] +- UNSPEC_FCLASS))] +- "" +- "fclass.\t%0,%1" +- [(set_attr "type" "unknown") +- (set_attr "mode" "")]) +- +-(define_insn "bytepick_w" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:SI 1 "register_operand" "r") +- (match_operand:SI 2 "register_operand" "r") +- (match_operand:SI 3 "const_0_to_3_operand" "n")] +- UNSPEC_BYTEPICK_W))] +- "" +- "bytepick.w\t%0,%1,%2,%z3" +- [(set_attr "type" "dspalu") +- (set_attr "mode" "SI")]) +- +-(define_insn "bytepick_d" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r") +- (match_operand:DI 2 "register_operand" "r") +- (match_operand:DI 3 "const_0_to_7_operand" "n")] +- UNSPEC_BYTEPICK_D))] +- "" +- "bytepick.d\t%0,%1,%2,%z3" +- [(set_attr "type" "dspalu") +- (set_attr "mode" "DI")]) +- +-(define_insn "bitrev_4b" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:SI 1 "register_operand" "r")] +- UNSPEC_BITREV_4B))] +- "" +- "bitrev.4b\t%0,%1" +- [(set_attr "type" "unknown") +- (set_attr "mode" "SI")]) +- +-(define_insn "bitrev_8b" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] +- UNSPEC_BITREV_8B))] +- "" +- "bitrev.8b\t%0,%1" +- [(set_attr "type" "unknown") +- (set_attr "mode" "DI")]) +- +- +- +-(define_insn "lu32i_d" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (ior:DI +- (zero_extend:DI +- (subreg:SI (match_operand:DI 1 "register_operand" "0") 0)) +- (match_operand:DI 2 "const_lu32i_operand" "u")))] +- "TARGET_64BIT" +- "lu32i.d\t%0,%X2>>32" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI")]) +- +-(define_insn "lu52i_d" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (ior:DI +- (and:DI (match_operand:DI 1 "register_operand" "r") +- (match_operand 2 "lu52i_mask_operand")) +- (match_operand 3 "const_lu52i_operand" "v")))] +- "TARGET_64BIT" +- "lu52i.d\t%0,%1,%X3>>52" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI")]) + + (define_mode_iterator QHSD [QI HI SI DI]) + +-(define_insn "crc_w__w" ++(define_insn "loongarch_crc_w__w" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] +@@ -4263,7 +3705,7 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + +-(define_insn "crcc_w__w" ++(define_insn "loongarch_crcc_w__w" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] +@@ -4277,6 +3719,9 @@ + + (include "sync.md") + ++(include "generic.md") ++(include "la464.md") ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +@@ -4286,35 +3731,6 @@ + ; The LoongArch ASX Instructions. + (include "lasx.md") + +-;; Is copying of this instruction disallowed? +-(define_attr "cannot_copy" "no,yes" (const_string "no")) +- +-(define_insn "stack_tie" +- [(set (mem:BLK (scratch)) +- (unspec:BLK [(match_operand:X 0 "register_operand" "r") +- (match_operand:X 1 "register_operand" "r")] +- UNSPEC_TIE))] +- "" +- "" +- [(set_attr "length" "0")] +-) +- +-(define_insn "gpr_save" +- [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_SAVE) +- (clobber (reg:SI T0_REGNUM)) +- (clobber (reg:SI T1_REGNUM))] +- "" +- { return loongarch_output_gpr_save (INTVAL (operands[0])); }) +- +-(define_insn "gpr_restore" +- [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_RESTORE)] +- "" +- "tail\t__loongarch_restore_%0") +- +-(define_insn "gpr_restore_return" +- [(return) +- (use (match_operand 0 "pmode_register_operand" "")) +- (const_int 0)] +- "" +- "") +- ++(define_c_enum "unspec" [ ++ UNSPEC_ADDRESS_FIRST ++]) +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 660de3674..075a2d6c7 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -1,6 +1,14 @@ +- ++; Generated by "genstr" from the template "loongarch.opt.in" ++; and definitions from "loongarch-strings". ++; ++; Please do not edit this file directly. ++; It will be automatically updated during a gcc build ++; if you change "loongarch.opt.in" or "loongarch-strings". ++; ++; Generated by "genstr" from the template "loongarch.opt.in" ++; and definitions from "loongarch-strings". + ; +-; Copyright (C) 2005-2018 Free Software Foundation, Inc. ++; Copyright (C) 2020-2022 Free Software Foundation, Inc. + ; + ; This file is part of GCC. + ; +@@ -17,155 +25,225 @@ + ; You should have received a copy of the GNU General Public License + ; along with GCC; see the file COPYING3. If not see + ; . ++; + + HeaderInclude + config/loongarch/loongarch-opts.h + +-mabi= +-Target RejectNegative Joined Enum(loongarch_abi) Var(loongarch_abi) Init(LARCH_ABI_DEFAULT) +--mabi=ABI Generate code that conforms to the given ABI. ++HeaderInclude ++config/loongarch/loongarch-str.h + ++TargetVariable ++unsigned int recip_mask = 0 ++ ++; ISA related options ++;; Base ISA + Enum +-Name(loongarch_abi) Type(int) +-Known Loongarch ABIs (for use with the -mabi= option): ++Name(isa_base) Type(int) ++Basic ISAs of LoongArch: + + EnumValue +-Enum(loongarch_abi) String(lp32) Value(ABILP32) ++Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) ++ ++;; ISA extensions / adjustments ++Enum ++Name(isa_ext_fpu) Type(int) ++FPU types of LoongArch: + + EnumValue +-Enum(loongarch_abi) String(lpx32) Value(ABILPX32) ++Enum(isa_ext_fpu) String(none) Value(ISA_EXT_NONE) + + EnumValue +-Enum(loongarch_abi) String(lp64) Value(ABILP64) ++Enum(isa_ext_fpu) String(32) Value(ISA_EXT_FPU32) + +-march= +-Target RejectNegative Joined Var(loongarch_arch_option) ToLower Enum(loongarch_arch_opt_value) +--march=ISA Generate code for the given ISA. ++EnumValue ++Enum(isa_ext_fpu) String(64) Value(ISA_EXT_FPU64) + +-mbranch-cost= +-Target RejectNegative Joined UInteger Var(loongarch_branch_cost) +--mbranch-cost=COST Set the cost of branches to roughly COST instructions. ++mfpu= ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++-mfpu=FPU Generate code for the given FPU. + +-mcheck-zero-division +-Target Report Mask(CHECK_ZERO_DIV) +-Trap on integer divide by zero. ++mfpu=0 ++Target RejectNegative Alias(mfpu=,none) ++ ++msoft-float ++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(msingle-float) ++Prevent the use of all hardware floating-point instructions. ++ ++msingle-float ++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(mdouble-float) ++Restrict the use of hardware floating-point instructions to 32-bit operations. + + mdouble-float +-Target Report RejectNegative InverseMask(SINGLE_FLOAT, DOUBLE_FLOAT) ++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(msoft-float) + Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations. + +-mflush-func= +-Target RejectNegative Joined Var(loongarch_cache_flush_func) Init(CACHE_FLUSH_FUNC) +--mflush-func=FUNC Use FUNC to flush the cache before calling stack trampolines. ++Enum ++Name(isa_ext_simd) Type(int) ++SIMD extension levels of LoongArch: ++ ++EnumValue ++Enum(isa_ext_simd) String(none) Value(ISA_EXT_NONE) ++ ++EnumValue ++Enum(isa_ext_simd) String(lsx) Value(ISA_EXT_SIMD_LSX) + +-Mask(64BIT) ++EnumValue ++Enum(isa_ext_simd) String(lasx) Value(ISA_EXT_SIMD_LASX) + +-Mask(FLOAT64) ++msimd= ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++-msimd=SIMD Generate code for the given SIMD extension. + +-mhard-float +-Target Report RejectNegative InverseMask(SOFT_FLOAT_ABI, HARD_FLOAT_ABI) +-Allow the use of hardware floating-point ABI and instructions. ++mlsx ++Target Driver Defer Var(la_deferred_options) ++Enable LoongArch SIMD Extension (LSX, 128-bit). + +-mlong-calls +-Target Report Var(TARGET_LONG_CALLS) +-Use indirect calls. ++mlasx ++Target Driver Defer Var(la_deferred_options) ++Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). + +-mmemcpy +-Target Report Mask(MEMCPY) +-Don't optimize block moves. ++;; Base target models (implies ISA & tune parameters) ++Enum ++Name(cpu_type) Type(int) ++LoongArch CPU types: + +-mno-float +-Target Report RejectNegative Var(TARGET_NO_FLOAT) Condition(TARGET_SUPPORTS_NO_FLOAT) +-Prevent the use of all floating-point operations. ++EnumValue ++Enum(cpu_type) String(native) Value(CPU_NATIVE) + +-mno-flush-func +-Target RejectNegative +-Do not use a cache-flushing function before calling stack trampolines. ++EnumValue ++Enum(cpu_type) String(abi-default) Value(CPU_ABI_DEFAULT) + +-mrelax-pic-calls +-Target Report Mask(RELAX_PIC_CALLS) +-Try to allow the linker to turn PIC calls into direct calls. ++EnumValue ++Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64) + +-mshared +-Target Report Var(TARGET_SHARED) Init(1) +-When generating -mabicalls code, make the code suitable for use in shared libraries. ++EnumValue ++Enum(cpu_type) String(la664) Value(CPU_LA664) + +-msingle-float +-Target Report RejectNegative Mask(SINGLE_FLOAT) +-Restrict the use of hardware floating-point instructions to 32-bit operations. ++EnumValue ++Enum(cpu_type) String(la464) Value(CPU_LA464) + +-msoft-float +-Target Report RejectNegative Mask(SOFT_FLOAT_ABI) +-Prevent the use of all hardware floating-point instructions. ++EnumValue ++Enum(cpu_type) String(la264) Value(CPU_LA264) ++ ++EnumValue ++Enum(cpu_type) String(la364) Value(CPU_LA364) + +-mlra +-Target Report Var(loongarch_lra_flag) Init(1) Save +-Use LRA instead of reload. ++march= ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++-march=PROCESSOR Generate code for the given PROCESSOR ISA. + + mtune= +-Target RejectNegative Joined Var(loongarch_tune_option) ToLower Enum(loongarch_arch_opt_value) +--mtune=PROCESSOR Optimize the output for PROCESSOR. ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++-mtune=PROCESSOR Generate optimized code for PROCESSOR. + +-mframe-header-opt +-Target Report Var(flag_frame_header_optimization) Optimization +-Optimize frame header. + +-noasmopt +-Driver ++; ABI related options ++; (ISA constraints on ABI are handled dynamically) + +-mstrict-align +-Target Report Mask(STRICT_ALIGN) Save +-Do not generate unaligned memory accesses. ++;; Base ABI ++Enum ++Name(abi_base) Type(int) ++Base ABI types for LoongArch: + +-mlsx +-Target Report Mask(LSX) +-Use LoongArch SX Extension instructions. ++EnumValue ++Enum(abi_base) String(lp64d) Value(ABI_BASE_LP64D) + +-mlasx +-Target Report Var(TARGET_LASX) +-Use LoongArch ASX Extension instructions. ++EnumValue ++Enum(abi_base) String(lp64f) Value(ABI_BASE_LP64F) + +-malign-llsc-target +-Target Report Var(TARGET_ALIGN_LLSC_TARGET) +-Target align llsc target. ++EnumValue ++Enum(abi_base) String(lp64s) Value(ABI_BASE_LP64S) + +-mmax-inline-memcpy-size= +-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) +--mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. ++mabi= ++Target RejectNegative Joined ToLower Enum(abi_base) Var(la_opt_abi_base) Init(M_OPT_UNSET) ++-mabi=BASEABI Generate code that conforms to the given BASEABI. ++ ++;; Legacy option: -mabi=lp64 ++mabi=lp64 ++Target RejectNegative Mask(LP64) ++-mabi=lp64 Legacy option that enables the lp64 integer ABI. ++ ++;; ABI Extension ++Variable ++int la_opt_abi_ext = M_OPT_UNSET ++ ++mbranch-cost= ++Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++-mbranch-cost=COST Set the cost of branches to roughly COST instructions. + + mvecarg + Target Report Var(TARGET_VECARG) Init(1) + Target pass vect arg uses vector register. + ++mmemvec-cost= ++Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) ++mmemvec-cost=COST Set the cost of vector memory access instructions. ++ ++mveclibabi= ++Target RejectNegative Joined Var(loongarch_veclibabi_name) ++Vector library ABI to use. ++ ++mstackrealign ++Target Var(loongarch_stack_realign) Init(1) ++Realign stack in prologue. ++ ++mforce-drap ++Target Var(loongarch_force_drap) Init(0) ++Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack. ++ ++mcheck-zero-division ++Target Mask(CHECK_ZERO_DIV) ++Trap on integer divide by zero. ++ + mcond-move-int +-Target Report Var(TARGET_COND_MOVE_INT) Init(1) ++Target Var(TARGET_COND_MOVE_INT) Init(1) + Conditional moves for integral are enabled. + + mcond-move-float +-Target Report Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Target Var(TARGET_COND_MOVE_FLOAT) Init(1) + Conditional moves for float are enabled. + +-; The code model option names for -mcmodel. ++mmemcpy ++Target Mask(MEMCPY) ++Prevent optimizing block moves, which is also the default behavior of -Os. + ++mstrict-align ++Target Var(TARGET_STRICT_ALIGN) Init(0) ++Do not generate unaligned memory accesses. ++ ++mmax-inline-memcpy-size= ++Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. ++ ++mrecip ++Target Report RejectNegative Var(loongarch_recip) ++Generate reciprocals instead of divss and sqrtss. ++ ++mrecip= ++Target Report RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ ++; The code model option names for -mcmodel. + Enum +-Name(cmodel) Type(enum loongarch_code_model) ++Name(cmodel) Type(int) + The code model option names for -mcmodel: + + EnumValue +-Enum(cmodel) String(normal) Value(LARCH_CMODEL_NORMAL) ++Enum(cmodel) String(normal) Value(CMODEL_NORMAL) + + EnumValue +-Enum(cmodel) String(tiny) Value(LARCH_CMODEL_TINY) ++Enum(cmodel) String(tiny) Value(CMODEL_TINY) + + EnumValue +-Enum(cmodel) String(tiny-static) Value(LARCH_CMODEL_TINY_STATIC) ++Enum(cmodel) String(tiny-static) Value(CMODEL_TINY_STATIC) + + EnumValue +-Enum(cmodel) String(large) Value(LARCH_CMODEL_LARGE) ++Enum(cmodel) String(large) Value(CMODEL_LARGE) + + EnumValue +-Enum(cmodel) String(extreme) Value(LARCH_CMODEL_EXTREME) ++Enum(cmodel) String(extreme) Value(CMODEL_EXTREME) + + mcmodel= +-Target RejectNegative Joined Enum(cmodel) Var(loongarch_cmodel_var) Init(LARCH_CMODEL_NORMAL) Save ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) + Specify the code model. +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 1f7034366..2b1d6f109 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -168,6 +168,9 @@ + ;; As ILSX but excludes V16QI. + (define_mode_iterator ILSX_DWH [V2DI V4SI V8HI]) + ++;; As LSX but excludes V16QI. ++(define_mode_iterator LSX_DWH [V2DF V4SF V2DI V4SI V8HI]) ++ + ;; As ILSX but excludes V2DI. + (define_mode_iterator ILSX_WHB [V4SI V8HI V16QI]) + +@@ -291,6 +294,10 @@ + (V2DI "d") + (V4SI "s")]) + ++(define_mode_attr flsxfrint ++ [(V2DF "d") ++ (V4SF "s")]) ++ + (define_mode_attr ilsxfmt + [(V2DF "l") + (V4SF "w")]) +@@ -327,6 +334,38 @@ + (V4SI "uimm5") + (V2DI "uimm6")]) + ++ ++(define_int_iterator FRINT_S [UNSPEC_LSX_VFRINTRP_S ++ UNSPEC_LSX_VFRINTRZ_S ++ UNSPEC_LSX_VFRINT ++ UNSPEC_LSX_VFRINTRM_S]) ++ ++(define_int_iterator FRINT_D [UNSPEC_LSX_VFRINTRP_D ++ UNSPEC_LSX_VFRINTRZ_D ++ UNSPEC_LSX_VFRINT ++ UNSPEC_LSX_VFRINTRM_D]) ++ ++(define_int_attr frint_pattern_s ++ [(UNSPEC_LSX_VFRINTRP_S "ceil") ++ (UNSPEC_LSX_VFRINTRZ_S "btrunc") ++ (UNSPEC_LSX_VFRINT "rint") ++ (UNSPEC_LSX_VFRINTRM_S "floor")]) ++ ++(define_int_attr frint_pattern_d ++ [(UNSPEC_LSX_VFRINTRP_D "ceil") ++ (UNSPEC_LSX_VFRINTRZ_D "btrunc") ++ (UNSPEC_LSX_VFRINT "rint") ++ (UNSPEC_LSX_VFRINTRM_D "floor")]) ++ ++(define_int_attr frint_suffix ++ [(UNSPEC_LSX_VFRINTRP_S "rp") ++ (UNSPEC_LSX_VFRINTRP_D "rp") ++ (UNSPEC_LSX_VFRINTRZ_S "rz") ++ (UNSPEC_LSX_VFRINTRZ_D "rz") ++ (UNSPEC_LSX_VFRINT "") ++ (UNSPEC_LSX_VFRINTRM_S "rm") ++ (UNSPEC_LSX_VFRINTRM_D "rm")]) ++ + (define_expand "vec_init" + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "")] +@@ -513,12 +552,12 @@ + DONE; + }) + +-(define_insn "lsx_vinsgr2vr_" +- [(set (match_operand:LSX 0 "register_operand" "=f") +- (vec_merge:LSX +- (vec_duplicate:LSX ++(define_insn "lsx_vinsgr2vr_" ++ [(set (match_operand:ILSX 0 "register_operand" "=f") ++ (vec_merge:ILSX ++ (vec_duplicate:ILSX + (match_operand: 1 "reg_or_0_operand" "rJ")) +- (match_operand:LSX 2 "register_operand" "0") ++ (match_operand:ILSX 2 "register_operand" "0") + (match_operand 3 "const__operand" "")))] + "ISA_HAS_LSX" + { +@@ -688,11 +727,23 @@ + DONE; + }) + +-(define_insn "lsx_vshuf_" +- [(set (match_operand:ILSX_DWH 0 "register_operand" "=f") +- (unspec:ILSX_DWH [(match_operand:ILSX_DWH 1 "register_operand" "0") +- (match_operand:ILSX_DWH 2 "register_operand" "f") +- (match_operand:ILSX_DWH 3 "register_operand" "f")] ++(define_expand "vec_perm" ++ [(match_operand:LSX 0 "register_operand") ++ (match_operand:LSX 1 "register_operand") ++ (match_operand:LSX 2 "register_operand") ++ (match_operand:LSX 3 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ loongarch_expand_vec_perm (operands[0], operands[1], ++ operands[2], operands[3]); ++ DONE; ++}) ++ ++(define_insn "lsx_vshuf_" ++ [(set (match_operand:LSX_DWH 0 "register_operand" "=f") ++ (unspec:LSX_DWH [(match_operand:LSX_DWH 1 "register_operand" "0") ++ (match_operand:LSX_DWH 2 "register_operand" "f") ++ (match_operand:LSX_DWH 3 "register_operand" "f")] + UNSPEC_LSX_VSHUF))] + "ISA_HAS_LSX" + "vshuf.\t%w0,%w2,%w3" +@@ -731,7 +782,7 @@ + [(set (match_operand:LSX 0 "nonimmediate_operand") + (match_operand:LSX 1 "move_operand"))] + "reload_completed && ISA_HAS_LSX +- && loongarch_split_move_insn_p (operands[0], operands[1], insn)" ++ && loongarch_split_move_insn_p (operands[0], operands[1])" + [(const_int 0)] + { + loongarch_split_move_insn (operands[0], operands[1], curr_insn); +@@ -996,7 +1047,25 @@ + [(set_attr "type" "simd_fmul") + (set_attr "mode" "")]) + +-(define_insn "div3" ++(define_expand "div3" ++ [(set (match_operand:FLSX 0 "register_operand") ++ (div:FLSX (match_operand:FLSX 1 "register_operand") ++ (match_operand:FLSX 2 "register_operand")))] ++ "ISA_HAS_LSX" ++{ ++ if (mode == V4SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands[0], operands[1], ++ operands[2], V4SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div3" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (div:FLSX (match_operand:FLSX 1 "register_operand" "f") + (match_operand:FLSX 2 "register_operand" "f")))] +@@ -1025,7 +1094,23 @@ + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +-(define_insn "sqrt2" ++(define_expand "sqrt2" ++ [(set (match_operand:FLSX 0 "register_operand") ++ (sqrt:FLSX (match_operand:FLSX 1 "register_operand")))] ++ "ISA_HAS_LSX" ++{ ++ if (mode == V4SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt2" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (sqrt:FLSX (match_operand:FLSX 1 "register_operand" "f")))] + "ISA_HAS_LSX" +@@ -1362,8 +1447,8 @@ + (V2DF "V4SI")]) + + (define_insn "lsx_vreplgr2vr_" +- [(set (match_operand:LSX 0 "register_operand" "=f,f") +- (vec_duplicate:LSX ++ [(set (match_operand:ILSX 0 "register_operand" "=f,f") ++ (vec_duplicate:ILSX + (match_operand: 1 "reg_or_0_operand" "r,J")))] + "ISA_HAS_LSX" + { +@@ -1389,7 +1474,7 @@ + DONE; + }) + +-(define_insn "lsx_vflogb_" ++(define_insn "logb2" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFLOGB))] +@@ -1449,6 +1534,15 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++(define_insn "lsx_vfrecipe_" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_RECIPE))] ++ "ISA_HAS_LSX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_DIV" ++ "vfrecipe.\t%w0,%w1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lsx_vfrint_" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +@@ -1467,6 +1561,42 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++(define_insn "lsx_vfrsqrte_" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_RSQRTE))] ++ "ISA_HAS_LSX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_SQRT" ++ "vfrsqrte.\t%w0,%w1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ ++(define_expand "rsqrt2" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRSQRT))] ++ "ISA_HAS_LSX" ++{ ++ if (mode == V4SFmode ++ && TARGET_RECIP_VEC_RSQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt2" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRSQRT))] ++ "ISA_HAS_LSX" ++ "vfrsqrt.\t%w0,%w1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ ++ + (define_insn "lsx_vftint_s__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLSX 1 "register_operand" "f")] +@@ -2172,8 +2302,8 @@ + + (define_insn "lsx_vreplvei__scalar" + [(set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX [(match_operand: 1 "register_operand" "f")] +- UNSPEC_LSX_VREPLVEI))] ++ (vec_duplicate:FLSX ++ (match_operand: 1 "register_operand" "f")))] + "ISA_HAS_LSX" + "vreplvei.\t%w0,%w1,0" + [(set_attr "type" "simd_splat") +@@ -2285,8 +2415,7 @@ + "vset.\t%Z3%w1\n\tbcnez\t%Z3%0"); + } + [(set_attr "type" "simd_branch") +- (set_attr "mode" "") +- (set_attr "compact_form" "never")]) ++ (set_attr "mode" "")]) + + (define_insn "lsx__v_" + [(set (pc) (if_then_else +@@ -2304,8 +2433,7 @@ + "vset.v\t%Z3%w1\n\tbcnez\t%Z3%0"); + } + [(set_attr "type" "simd_branch") +- (set_attr "mode" "TI") +- (set_attr "compact_form" "never")]) ++ (set_attr "mode" "TI")]) + + ;; vec_concate + (define_expand "vec_concatv2di" +@@ -2923,8 +3051,8 @@ + (set_attr "mode" "V4SF")]) + + (define_insn "lsx_vfrintrne_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] ++ [(set (match_operand:V4SF 0 "register_operand" "=f") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRNE_S))] + "ISA_HAS_LSX" + "vfrintrne.s\t%w0,%w1" +@@ -2932,8 +3060,8 @@ + (set_attr "mode" "V4SF")]) + + (define_insn "lsx_vfrintrne_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] ++ [(set (match_operand:V2DF 0 "register_operand" "=f") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRNE_D))] + "ISA_HAS_LSX" + "vfrintrne.d\t%w0,%w1" +@@ -2941,8 +3069,8 @@ + (set_attr "mode" "V2DF")]) + + (define_insn "lsx_vfrintrz_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] ++ [(set (match_operand:V4SF 0 "register_operand" "=f") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRZ_S))] + "ISA_HAS_LSX" + "vfrintrz.s\t%w0,%w1" +@@ -2950,8 +3078,8 @@ + (set_attr "mode" "V4SF")]) + + (define_insn "lsx_vfrintrz_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] ++ [(set (match_operand:V2DF 0 "register_operand" "=f") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRZ_D))] + "ISA_HAS_LSX" + "vfrintrz.d\t%w0,%w1" +@@ -2959,8 +3087,8 @@ + (set_attr "mode" "V2DF")]) + + (define_insn "lsx_vfrintrp_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] ++ [(set (match_operand:V4SF 0 "register_operand" "=f") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRP_S))] + "ISA_HAS_LSX" + "vfrintrp.s\t%w0,%w1" +@@ -2968,8 +3096,8 @@ + (set_attr "mode" "V4SF")]) + + (define_insn "lsx_vfrintrp_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] ++ [(set (match_operand:V2DF 0 "register_operand" "=f") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRP_D))] + "ISA_HAS_LSX" + "vfrintrp.d\t%w0,%w1" +@@ -2977,8 +3105,8 @@ + (set_attr "mode" "V2DF")]) + + (define_insn "lsx_vfrintrm_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] ++ [(set (match_operand:V4SF 0 "register_operand" "=f") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRM_S))] + "ISA_HAS_LSX" + "vfrintrm.s\t%w0,%w1" +@@ -2986,14 +3114,44 @@ + (set_attr "mode" "V4SF")]) + + (define_insn "lsx_vfrintrm_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] ++ [(set (match_operand:V2DF 0 "register_operand" "=f") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] + UNSPEC_LSX_VFRINTRM_D))] + "ISA_HAS_LSX" + "vfrintrm.d\t%w0,%w1" + [(set_attr "type" "simd_shift") + (set_attr "mode" "V2DF")]) + ++;; Vector versions of the floating-point frint patterns. ++;; Expands to btrunc, ceil, floor, rint. ++(define_insn "v4sf2" ++ [(set (match_operand:V4SF 0 "register_operand" "=f") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] ++ FRINT_S))] ++ "ISA_HAS_LSX" ++ "vfrint.s\t%w0,%w1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V4SF")]) ++ ++(define_insn "v2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=f") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] ++ FRINT_D))] ++ "ISA_HAS_LSX" ++ "vfrint.d\t%w0,%w1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V2DF")]) ++ ++;; Expands to round. ++(define_insn "round2" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRINT))] ++ "ISA_HAS_LSX" ++ "vfrint.\t%w0,%w1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "")]) ++ + ;; Offset load and broadcast + (define_expand "lsx_vldrepl_" + [(match_operand:LSX 0 "register_operand") +@@ -3019,6 +3177,18 @@ + (set_attr "mode" "") + (set_attr "length" "4")]) + ++(define_insn "lsx_vldrepl__insn_0" ++ [(set (match_operand:LSX 0 "register_operand" "=f") ++ (vec_duplicate:LSX ++ (mem: (match_operand:DI 1 "register_operand" "r"))))] ++ "ISA_HAS_LSX" ++{ ++ return "vldrepl.\t%w0,%1,0"; ++} ++ [(set_attr "type" "simd_load") ++ (set_attr "mode" "") ++ (set_attr "length" "4")]) ++ + ;; Offset store by sel + (define_expand "lsx_vstelm_" + [(match_operand:LSX 0 "register_operand") +@@ -3047,6 +3217,20 @@ + (set_attr "mode" "") + (set_attr "length" "4")]) + ++;; Offset is "0" ++(define_insn "lsx_vstelm__insn_0" ++ [(set (mem: (match_operand:DI 0 "register_operand" "r")) ++ (vec_select: ++ (match_operand:LSX 1 "register_operand" "f") ++ (parallel [(match_operand:SI 2 "const__operand")])))] ++ "ISA_HAS_LSX" ++{ ++ return "vstelm.\t%w1,%0,0,%2"; ++} ++ [(set_attr "type" "simd_store") ++ (set_attr "mode" "") ++ (set_attr "length" "4")]) ++ + (define_expand "lsx_vld" + [(match_operand:V16QI 0 "register_operand") + (match_operand 1 "pmode_register_operand") +@@ -3179,3 +3363,101 @@ + } + [(set_attr "type" "simd_fcmp") + (set_attr "mode" "FCC")]) ++ ++;; Vector reduction operation ++(define_expand "reduc_plus_scal_v2di" ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:V2DI 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (V2DImode); ++ emit_insn (gen_lsx_vhaddw_q_d (tmp, operands[1], operands[1])); ++ emit_insn (gen_vec_extractv2didi (operands[0], tmp, const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_plus_scal_v4si" ++ [(match_operand:SI 0 "register_operand") ++ (match_operand:V4SI 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (V2DImode); ++ rtx tmp1 = gen_reg_rtx (V2DImode); ++ emit_insn (gen_lsx_vhaddw_d_w (tmp, operands[1], operands[1])); ++ emit_insn (gen_lsx_vhaddw_q_d (tmp1, tmp, tmp)); ++ emit_insn (gen_vec_extractv4sisi (operands[0], gen_lowpart(V4SImode,tmp1), const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_plus_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:FLSX 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_add3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc__scal_" ++ [(any_bitwise: ++ (match_operand: 0 "register_operand") ++ (match_operand:ILSX 1 "register_operand"))] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_smax_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:LSX 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_smax3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_smin_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:LSX 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_smin3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_umax_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:ILSX 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_umax3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "reduc_umin_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:ILSX 1 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_umin3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index fe3043e3d..2d1598536 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -3291,65 +3291,65 @@ __m128i __lsx_vftintrneh_l_s(__m128 _1) + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V4SI, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrne_s(__m128 _1) ++__m128 __lsx_vfrintrne_s(__m128 _1) + { +- return (__m128i)__builtin_lsx_vfrintrne_s((v4f32)_1); ++ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V2DI, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrne_d(__m128d _1) ++__m128d __lsx_vfrintrne_d(__m128d _1) + { +- return (__m128i)__builtin_lsx_vfrintrne_d((v2f64)_1); ++ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V4SI, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrz_s(__m128 _1) ++__m128 __lsx_vfrintrz_s(__m128 _1) + { +- return (__m128i)__builtin_lsx_vfrintrz_s((v4f32)_1); ++ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V2DI, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrz_d(__m128d _1) ++__m128d __lsx_vfrintrz_d(__m128d _1) + { +- return (__m128i)__builtin_lsx_vfrintrz_d((v2f64)_1); ++ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V4SI, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrp_s(__m128 _1) ++__m128 __lsx_vfrintrp_s(__m128 _1) + { +- return (__m128i)__builtin_lsx_vfrintrp_s((v4f32)_1); ++ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V2DI, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrp_d(__m128d _1) ++__m128d __lsx_vfrintrp_d(__m128d _1) + { +- return (__m128i)__builtin_lsx_vfrintrp_d((v2f64)_1); ++ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V4SI, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrm_s(__m128 _1) ++__m128 __lsx_vfrintrm_s(__m128 _1) + { +- return (__m128i)__builtin_lsx_vfrintrm_s((v4f32)_1); ++ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); + } + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V2DI, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vfrintrm_d(__m128d _1) ++__m128d __lsx_vfrintrm_d(__m128d _1) + { +- return (__m128i)__builtin_lsx_vfrintrm_d((v2f64)_1); ++ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); + } + + /* Assembly instruction format: vd, rj, si8, idx. */ +@@ -4154,19 +4154,19 @@ __m128i __lsx_vsub_q(__m128i _1, __m128i _2) + + /* Assembly instruction format: vd, rj, si12. */ + /* Data types in instruction templates: V16QI, CVPOINTER, SI. */ +-#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) ((__m128i)__builtin_lsx_vldrepl_b((void *)(_1), (_2))) ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + + /* Assembly instruction format: vd, rj, si11. */ + /* Data types in instruction templates: V8HI, CVPOINTER, SI. */ +-#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) ((__m128i)__builtin_lsx_vldrepl_h((void *)(_1), (_2))) ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + + /* Assembly instruction format: vd, rj, si10. */ + /* Data types in instruction templates: V4SI, CVPOINTER, SI. */ +-#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) ((__m128i)__builtin_lsx_vldrepl_w((void *)(_1), (_2))) ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + + /* Assembly instruction format: vd, rj, si9. */ + /* Data types in instruction templates: V2DI, CVPOINTER, SI. */ +-#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) ((__m128i)__builtin_lsx_vldrepl_d((void *)(_1), (_2))) ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V16QI, V16QI. */ +@@ -4470,7 +4470,7 @@ __m128i __lsx_vextl_q_d(__m128i _1) + + /* Assembly instruction format: vd, rj, si12. */ + /* Data types in instruction templates: V16QI, CVPOINTER, SI. */ +-#define __lsx_vld(/*void **/ _1, /*si12*/ _2) ((__m128i)__builtin_lsx_vld((void *)(_1), (_2))) ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + + /* Assembly instruction format: vd, rj, si12. */ + /* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI. */ +@@ -4547,9 +4547,9 @@ __m128i __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) + /* Assembly instruction format: vd, rj, rk. */ + /* Data types in instruction templates: V16QI, CVPOINTER, DI. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +-__m128i __lsx_vldx(void * _1, long int _2) ++__m128i __lsx_vldx(void const * _1, long int _2) + { +- return (__m128i)__builtin_lsx_vldx((void *)_1, (long int)_2); ++ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); + } + + /* Assembly instruction format: vd, rj, rk. */ +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 20638559d..daacaf003 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -1,5 +1,7 @@ +-;; Predicate definitions for LARCH. +-;; Copyright (C) 2004-2018 Free Software Foundation, Inc. ++;; Predicate definitions for LoongArch target. ++;; Copyright (C) 2020-2022 Free Software Foundation, Inc. ++;; Contributed by Loongson Co. Ltd. ++;; Based on MIPS target for GNU compiler. + ;; + ;; This file is part of GCC. + ;; +@@ -19,7 +21,7 @@ + + (define_predicate "const_uns_arith_operand" + (and (match_code "const_int") +- (match_test "SMALL_OPERAND_UNSIGNED (INTVAL (op))"))) ++ (match_test "IMM12_OPERAND_UNSIGNED (INTVAL (op))"))) + + (define_predicate "uns_arith_operand" + (ior (match_operand 0 "const_uns_arith_operand") +@@ -45,7 +47,7 @@ + (ior (match_operand 0 "const_arith_operand") + (match_operand 0 "register_operand"))) + +-(define_predicate "const_immlsa_operand" ++(define_predicate "const_immalsl_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 4)"))) + +@@ -69,9 +71,6 @@ + (and (match_code "const_int") + (match_test "UIMM6_OPERAND (INTVAL (op))"))) + +-(define_predicate "const_uimm7_operand" +- (and (match_code "const_int") +- (match_test "IN_RANGE (INTVAL (op), 0, 127)"))) + + (define_predicate "const_uimm8_operand" + (and (match_code "const_int") +@@ -85,10 +84,6 @@ + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 32767)"))) + +-(define_predicate "const_imm5_operand" +- (and (match_code "const_int") +- (match_test "IN_RANGE (INTVAL (op), -16, 15)"))) +- + (define_predicate "const_imm10_operand" + (and (match_code "const_int") + (match_test "IMM10_OPERAND (INTVAL (op))"))) +@@ -101,10 +96,6 @@ + (and (match_code "const_int") + (match_test "IMM13_OPERAND (INTVAL (op))"))) + +-(define_predicate "reg_imm10_operand" +- (ior (match_operand 0 "const_imm10_operand") +- (match_operand 0 "register_operand"))) +- + (define_predicate "aq8b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)"))) +@@ -137,6 +128,7 @@ + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)"))) + ++ + (define_predicate "aq12b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)"))) +@@ -155,7 +147,7 @@ + + (define_predicate "sle_operand" + (and (match_code "const_int") +- (match_test "SMALL_OPERAND (INTVAL (op) + 1)"))) ++ (match_test "IMM12_OPERAND (INTVAL (op) + 1)"))) + + (define_predicate "sleu_operand" + (and (match_operand 0 "sle_operand") +@@ -223,179 +215,40 @@ + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +-(define_predicate "const_4_to_7_operand" ++(define_predicate "const_4_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 4, 7)"))) +- ++ + (define_predicate "const_8_to_15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) +- +-(define_predicate "const_16_to_31_operand" +- (and (match_code "const_int") +- (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) +- +-(define_predicate "qi_mask_operand" +- (and (match_code "const_int") +- (match_test "UINTVAL (op) == 0xff"))) + +-(define_predicate "hi_mask_operand" ++(define_predicate "const_8_to_11_operand" + (and (match_code "const_int") +- (match_test "UINTVAL (op) == 0xffff"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 11)"))) + +-(define_predicate "lu52i_mask_operand" ++(define_predicate "const_12_to_15_operand" + (and (match_code "const_int") +- (match_test "UINTVAL (op) == 0xfffffffffffff"))) ++ (match_test "IN_RANGE (INTVAL (op), 12, 15)"))) + +-(define_predicate "shift_mask_operand" ++(define_predicate "const_16_to_31_operand" + (and (match_code "const_int") +- (ior (match_test "UINTVAL (op) == 0x3fffffffc") +- (match_test "UINTVAL (op) == 0x1fffffffe") +- (match_test "UINTVAL (op) == 0x7fffffff8") +- (match_test "UINTVAL (op) == 0xffffffff0")))) +- +- ++ (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +-(define_predicate "si_mask_operand" ++(define_predicate "lu52i_mask_operand" + (and (match_code "const_int") +- (match_test "UINTVAL (op) == 0xffffffff"))) +- +-(define_predicate "and_load_operand" +- (ior (match_operand 0 "qi_mask_operand") +- (match_operand 0 "hi_mask_operand") +- (match_operand 0 "si_mask_operand"))) ++ (match_test "UINTVAL (op) == 0xfffffffffffff"))) + + (define_predicate "low_bitmask_operand" + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + +-(define_predicate "and_reg_operand" +- (ior (match_operand 0 "register_operand") +- (match_operand 0 "const_uns_arith_operand") +- (match_operand 0 "low_bitmask_operand") +- (match_operand 0 "si_mask_operand"))) +- +-(define_predicate "and_operand" +- (ior (match_operand 0 "and_load_operand") +- (match_operand 0 "and_reg_operand"))) +- +-(define_predicate "d_operand" +- (and (match_code "reg") +- (match_test "GP_REG_P (REGNO (op))"))) +- +-(define_predicate "lwsp_swsp_operand" +- (and (match_code "mem") +- (match_test "lwsp_swsp_address_p (XEXP (op, 0), mode)"))) +- +-(define_predicate "db4_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 4, 0)"))) +- +-(define_predicate "db7_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 7, 0)"))) +- +-(define_predicate "db8_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 8, 0)"))) +- +-(define_predicate "ib3_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op) - 1, 3, 0)"))) +- +-(define_predicate "sb4_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 4, 0)"))) +- +-(define_predicate "sb5_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 5, 0)"))) +- +-(define_predicate "sb8_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)"))) +- +-(define_predicate "sd8_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) +- +-(define_predicate "ub4_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 0)"))) +- +-(define_predicate "ub8_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 0)"))) +- +-(define_predicate "uh4_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 1)"))) +- +-(define_predicate "uw4_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 2)"))) +- +-(define_predicate "uw5_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 5, 2)"))) +- +-(define_predicate "uw6_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 6, 2)"))) +- +-(define_predicate "uw8_operand" +- (and (match_code "const_int") +- (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 2)"))) +- +-(define_predicate "addiur2_operand" +- (and (match_code "const_int") +- (ior (match_test "INTVAL (op) == -1") +- (match_test "INTVAL (op) == 1") +- (match_test "INTVAL (op) == 4") +- (match_test "INTVAL (op) == 8") +- (match_test "INTVAL (op) == 12") +- (match_test "INTVAL (op) == 16") +- (match_test "INTVAL (op) == 20") +- (match_test "INTVAL (op) == 24")))) +- +-(define_predicate "addiusp_operand" +- (and (match_code "const_int") +- (ior (match_test "(IN_RANGE (INTVAL (op), 2, 257))") +- (match_test "(IN_RANGE (INTVAL (op), -258, -3))")))) +- +-(define_predicate "andi16_operand" +- (and (match_code "const_int") +- (ior (match_test "IN_RANGE (INTVAL (op), 1, 4)") +- (match_test "IN_RANGE (INTVAL (op), 7, 8)") +- (match_test "IN_RANGE (INTVAL (op), 15, 16)") +- (match_test "IN_RANGE (INTVAL (op), 31, 32)") +- (match_test "IN_RANGE (INTVAL (op), 63, 64)") +- (match_test "INTVAL (op) == 255") +- (match_test "INTVAL (op) == 32768") +- (match_test "INTVAL (op) == 65535")))) +- +-(define_predicate "movep_src_register" +- (and (match_code "reg") +- (ior (match_test ("IN_RANGE (REGNO (op), 2, 3)")) +- (match_test ("IN_RANGE (REGNO (op), 16, 20)"))))) +- +-(define_predicate "movep_src_operand" +- (ior (match_operand 0 "const_0_operand") +- (match_operand 0 "movep_src_register"))) +- +-(define_predicate "fcc_reload_operand" +- (and (match_code "reg,subreg") +- (match_test "ST_REG_P (true_regnum (op))"))) +- +-(define_predicate "muldiv_target_operand" +- (match_operand 0 "register_operand")) +- + (define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") + { + enum loongarch_symbol_type symbol_type; + +- if (!loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_CALL, &symbol_type)) ++ if (!loongarch_symbolic_constant_p (op, &symbol_type)) + return false; + + switch (symbol_type) +@@ -403,9 +256,9 @@ + case SYMBOL_GOT_DISP: + /* Without explicit relocs, there is no special syntax for + loading the address of a call destination into a register. +- Using "la $25,foo; jal $25" would prevent the lazy binding +- of "foo", so keep the address of global symbols with the +- jal macro. */ ++ Using "la.global JIRL_REGS,foo; jirl JIRL_REGS" would prevent the lazy ++ binding of "foo", so keep the address of global symbols with the jirl ++ macro. */ + return 1; + + default: +@@ -420,7 +273,7 @@ + (define_predicate "is_const_call_local_symbol" + (and (match_operand 0 "const_call_insn_operand") + (ior (match_test "loongarch_global_symbol_p (op) == 0") +- (match_test "loongarch_symbol_binds_local_p (op) != 0")) ++ (match_test "loongarch_symbol_binds_local_p (op) != 0")) + (match_test "CONSTANT_P (op)"))) + + (define_predicate "is_const_call_weak_symbol" +@@ -446,7 +299,6 @@ + (define_predicate "splittable_const_int_operand" + (match_code "const_int") + { +- + /* Don't handle multi-word moves this way; we don't want to introduce + the individual word-mode moves until after reload. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) +@@ -454,9 +306,8 @@ + + /* Otherwise check whether the constant can be loaded in a single + instruction. */ +-// return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op); +- return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op) +- && !LU52I_INT (op); ++ return !LU12I_INT (op) && !IMM12_INT (op) && !IMM12_INT_UNSIGNED (op) ++ && !LU52I_INT (op); + }) + + (define_predicate "move_operand" +@@ -504,73 +355,34 @@ + case CONST: + case SYMBOL_REF: + case LABEL_REF: +- return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)); ++ return (loongarch_symbolic_constant_p (op, &symbol_type)); + default: + return true; + } + }) + +-(define_predicate "consttable_operand" +- (match_test "CONSTANT_P (op)")) +- + (define_predicate "symbolic_operand" + (match_code "const,symbol_ref,label_ref") + { + enum loongarch_symbol_type type; +- return loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type); +-}) +- +-(define_predicate "force_to_mem_operand" +- (match_code "const,symbol_ref,label_ref") +-{ +- enum loongarch_symbol_type symbol_type; +- return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type) +- && loongarch_use_pcrel_pool_p[(int) symbol_type]); +-}) +- +-(define_predicate "got_disp_operand" +- (match_code "const,symbol_ref,label_ref") +-{ +- enum loongarch_symbol_type type; +- return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type) +- && type == SYMBOL_GOT_DISP); ++ return loongarch_symbolic_constant_p (op, &type); + }) + +-(define_predicate "symbol_ref_operand" +- (match_code "symbol_ref")) +- +-(define_predicate "stack_operand" +- (and (match_code "mem") +- (match_test "loongarch_stack_address_p (XEXP (op, 0), GET_MODE (op))"))) +- +- +- + (define_predicate "equality_operator" + (match_code "eq,ne")) + +-(define_predicate "extend_operator" +- (match_code "zero_extend,sign_extend")) +- +-(define_predicate "trap_comparison_operator" +- (match_code "eq,ne,lt,ltu,ge,geu")) +- + (define_predicate "order_operator" + (match_code "lt,ltu,le,leu,ge,geu,gt,gtu")) + + ;; For NE, cstore uses sltu instructions in which the first operand is $0. + + (define_predicate "loongarch_cstore_operator" +- (ior (match_code "eq,gt,gtu,ge,geu,lt,ltu,le,leu") +- (match_code "ne"))) ++ (match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu")) + + (define_predicate "small_data_pattern" + (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") + (match_test "loongarch_small_data_pattern_p (op)"))) + +-(define_predicate "mem_noofs_operand" +- (and (match_code "mem") +- (match_code "reg" "0"))) +- + ;; Return 1 if the operand is in non-volatile memory. + (define_predicate "non_volatile_mem_operand" + (and (match_operand 0 "memory_operand") +@@ -606,12 +418,6 @@ + return loongarch_const_vector_same_int_p (op, mode, 0, 63); + }) + +-(define_predicate "const_vector_same_uimm8_operand" +- (match_code "const_vector") +-{ +- return loongarch_const_vector_same_int_p (op, mode, 0, 255); +-}) +- + (define_predicate "par_const_vector_shf_set_operand" + (match_code "parallel") + { +diff --git a/gcc/config/loongarch/rtems.h b/gcc/config/loongarch/rtems.h +deleted file mode 100644 +index bbb70b040..000000000 +--- a/gcc/config/loongarch/rtems.h ++++ /dev/null +@@ -1,39 +0,0 @@ +-/* Definitions for rtems targeting a LARCH using ELF. +- Copyright (C) 1996-2018 Free Software Foundation, Inc. +- Contributed by Joel Sherrill (joel@OARcorp.com). +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- . */ +- +-/* Specify predefined symbols in preprocessor. */ +- +-#define TARGET_OS_CPP_BUILTINS() \ +-do { \ +- builtin_define ("__rtems__"); \ +- builtin_define ("__USE_INIT_FINI__"); \ +- builtin_assert ("system=rtems"); \ +-} while (0) +- +-/* No sdata. +- * The RTEMS BSPs expect -G0 +- */ +-#undef LARCH_DEFAULT_GVALUE +-#define LARCH_DEFAULT_GVALUE 0 +diff --git a/gcc/config/loongarch/sde.opt b/gcc/config/loongarch/sde.opt +deleted file mode 100644 +index 321217d51..000000000 +--- a/gcc/config/loongarch/sde.opt ++++ /dev/null +@@ -1,28 +0,0 @@ +-; LARCH SDE options. +-; +-; Copyright (C) 2010-2018 Free Software Foundation, Inc. +-; +-; This file is part of GCC. +-; +-; GCC is free software; you can redistribute it and/or modify it under +-; the terms of the GNU General Public License as published by the Free +-; Software Foundation; either version 3, or (at your option) any later +-; version. +-; +-; GCC is distributed in the hope that it will be useful, but WITHOUT +-; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +-; License for more details. +-; +-; You should have received a copy of the GNU General Public License +-; along with GCC; see the file COPYING3. If not see +-; . +- +-; -mcode-xonly is a traditional alias for -mcode-readable=pcrel and +-; -mno-data-in-code is a traditional alias for -mcode-readable=no. +- +-mno-data-in-code +-Target RejectNegative Alias(mcode-readable=, no) +- +-mcode-xonly +-Target RejectNegative Alias(mcode-readable=, pcrel) +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 5a16c4fa3..abc401339 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -1,7 +1,7 @@ +-;; Machine description for LARCH atomic operations. +-;; Copyright (C) 2011-2018 Free Software Foundation, Inc. +-;; Contributed by Andrew Waterman (andrew@sifive.com). +-;; Based on LARCH target for GNU compiler. ++;; Machine description for LoongArch atomic operations. ++;; Copyright (C) 2020-2022 Free Software Foundation, Inc. ++;; Contributed by Loongson Co. Ltd. ++;; Based on MIPS and RISC-V target for GNU compiler. + + ;; This file is part of GCC. + +@@ -29,6 +29,7 @@ + UNSPEC_COMPARE_AND_SWAP_NAND + UNSPEC_SYNC_OLD_OP + UNSPEC_SYNC_EXCHANGE ++ UNSPEC_ATOMIC_LOAD + UNSPEC_ATOMIC_STORE + UNSPEC_MEMORY_BARRIER + ]) +@@ -37,21 +38,25 @@ + (define_code_attr atomic_optab + [(plus "add") (ior "or") (xor "xor") (and "and")]) + ++(define_mode_iterator AMO_BHWD [(QI "TARGET_uARCH_LA664") ++ (HI "TARGET_uARCH_LA664") ++ SI DI]) ++ + ;; This attribute gives the format suffix for atomic memory operations. +-(define_mode_attr amo [(SI "w") (DI "d")]) ++(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")]) ++ ++;; expands to the name of the atomic operand that implements a ++;; particular code. ++(define_code_attr amop [(ior "or") (xor "xor") (and "and") (plus "add")]) + +-;; expands to the name of the atomic operand that implements a particular code. +-(define_code_attr amop [(ior "or") +- (xor "xor") +- (and "and") +- (plus "add")]) + ;; Memory barriers. + + (define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand" "")] ;; model + "" + { +- if (INTVAL (operands[0]) != MEMMODEL_RELAXED) ++ enum memmodel model = memmodel_from_int (INTVAL (operands[0])); ++ if (!is_mm_relaxed (model)) + { + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; +@@ -60,37 +65,79 @@ + DONE; + }) + +-;; Until the LARCH memory model (hence its mapping from C++) is finalized, ++;; Until the LoongArch memory model (hence its mapping from C++) is finalized, + ;; conservatively emit a full FENCE. + (define_insn "mem_thread_fence_1" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (match_operand:SI 1 "const_int_operand" "")] ;; model + "" +- "dbar\t0") ++{ ++ enum memmodel model = memmodel_from_int (INTVAL (operands[1])); ++ if (is_mm_consume (model)) ++ return "dbar\t0x700"; ++ else if (is_mm_acquire (model)) ++ return "dbar\t0x14"; ++ else ++ return "dbar\t0x10"; ++}) + + ;; Atomic memory operations. + ++(define_insn "atomic_load" ++ [(set (match_operand:QHWD 0 "register_operand" "=r") ++ (unspec_volatile:QHWD ++ [(match_operand:QHWD 1 "memory_operand" "+m") ++ (match_operand:SI 2 "const_int_operand")] ;; model ++ UNSPEC_ATOMIC_LOAD))] ++ "" ++{ ++ enum memmodel model = memmodel_from_int (INTVAL (operands[2])); ++ if (is_mm_relaxed (model) || is_mm_release (model)) ++ return "ld.\t%0,%1"; ++ if (is_mm_consume (model)) ++ return "ld.\t%0,%1\n\tdbar\t0x700"; ++ else ++ return "ld.\t%0,%1\n\tdbar\t0x14"; ++}) ++ + ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. + (define_insn "atomic_store" +- [(set (match_operand:GPR 0 "memory_operand" "+ZB") +- (unspec_volatile:GPR +- [(match_operand:GPR 1 "reg_or_0_operand" "rJ") ++ [(set (match_operand:QHWD 0 "memory_operand" "+m") ++ (unspec_volatile:QHWD ++ [(match_operand:QHWD 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_ATOMIC_STORE))] + "" +- "amswap%A2.\t$zero,%z1,%0" ++{ ++ enum memmodel model = memmodel_from_int (INTVAL (operands[2])); ++ if (is_mm_relaxed (model) || is_mm_acquire (model) || is_mm_consume (model)) ++ return "st.\t%z1,%0"; ++ else ++ return "dbar\t0x12\n\tst.\t%z1,%0"; ++} + [(set (attr "length") (const_int 8))]) + + (define_insn "atomic_" + [(set (match_operand:GPR 0 "memory_operand" "+ZB") + (unspec_volatile:GPR + [(any_atomic:GPR (match_dup 0) +- (match_operand:GPR 1 "reg_or_0_operand" "rJ")) ++ (match_operand:GPR 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +- "am%A2.\t$zero,%z1,%0" ++ "%J2\n\tam%A2.\t$zero,%z1,%0\n\t%K2" ++ [(set (attr "length") (const_int 8))]) ++ ++(define_insn "atomic_add" ++ [(set (match_operand:SHORT 0 "memory_operand" "+ZB") ++ (unspec_volatile:SHORT ++ [(plus:SHORT (match_dup 0) ++ (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 2 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "TARGET_uARCH_LA664" ++ "%J2\n\tamadd%A2.\t$zero,%z1,%0\n\t%K2" + [(set (attr "length") (const_int 8))]) + + (define_insn "atomic_fetch_" +@@ -99,11 +146,11 @@ + (set (match_dup 1) + (unspec_volatile:GPR + [(any_atomic:GPR (match_dup 1) +- (match_operand:GPR 2 "reg_or_0_operand" "rJ")) ++ (match_operand:GPR 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +- "am%A3.\t%0,%z2,%1" ++ "%J3\n\tam%A3.\t%0,%z2,%1\n\t%K3" + [(set (attr "length") (const_int 8))]) + + (define_insn "atomic_exchange" +@@ -115,35 +162,90 @@ + (set (match_dup 1) + (match_operand:GPR 2 "register_operand" "r"))] + "" +- "amswap%A3.\t%0,%z2,%1" ++ "%J3\n\tamswap%A3.\t%0,%z2,%1\n\t%K3" ++ [(set (attr "length") (const_int 8))]) ++ ++(define_insn "atomic_exchange_1" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (unspec_volatile:SHORT ++ [(match_operand:SHORT 1 "memory_operand" "+ZB") ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_EXCHANGE)) ++ (set (match_dup 1) ++ (match_operand:SHORT 2 "register_operand" "r"))] ++ "" ++ "%J3\n\tamswap%A3.\t%0,%z2,%1\n\t%K3" + [(set (attr "length") (const_int 8))]) + + (define_insn "atomic_cas_value_strong" + [(set (match_operand:GPR 0 "register_operand" "=&r") +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") +- (match_operand:SI 4 "const_int_operand") ;; mod_s +- (match_operand:SI 5 "const_int_operand")] ;; mod_f ++ (match_operand:SI 4 "const_int_operand")] ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- (clobber (match_scratch:GPR 6 "=&r"))] ++ (clobber (match_scratch:GPR 5 "=&r"))] + "" + { +- return "%G5\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "bne\t%0,%z2,2f\n\t" +- "or%i3\t%6,$zero,%3\n\t" +- "sc.\t%6,%1\n\t" +- "beq\t$zero,%6,1b\n\t" +- "b\t3f\n\t" +- "2:\n\t" +- "dbar\t0x700\n\t" +- "3:\n\t"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[4])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("bne\t%0,%z2,2f", operands); ++ output_asm_insn ("or%i3\t%5,$zero,%3", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%5,%1", operands); ++ else ++ output_asm_insn ("screl.\t%5,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%5,1b", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ { ++ output_asm_insn ("b\t3f", operands); ++ output_asm_insn ("2:", operands); ++ output_asm_insn ("dbar\t0x700", operands); ++ output_asm_insn ("3:", operands); ++ } ++ else ++ output_asm_insn ("2:", operands); ++ return ""; ++ } ++ else ++ return "%G4\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "bne\\t%0,%z2,2f\\n\\t" ++ "or%i3\\t%5,$zero,%3\\n\\t" ++ "sc.\\t%5,%1\\n\\t" ++ "beq\\t$zero,%5,1b\\n\\t" ++ "b\\t3f\\n\\t" ++ "2:\\n\\t" ++ "dbar\\t0x700\\n\\t" ++ "3:\\n\\t"; + } + [(set (attr "length") (const_int 32))]) + ++(define_insn "atomic_cas_value_strong_3a6000" ++ [(set (match_operand:AMO_BHWD 0 "register_operand" "=&r") ++ (match_operand:AMO_BHWD 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:AMO_BHWD [(match_operand:AMO_BHWD 2 "reg_or_0_operand" "rJ") ++ (match_operand:AMO_BHWD 3 "reg_or_0_operand" "rJ") ++ (match_operand:SI 4 "const_int_operand")] ;; mod_s ++ UNSPEC_COMPARE_AND_SWAP))] ++ "TARGET_uARCH_LA664" ++ "ori\t%0,%z2,0\n\t%J4\n\tamcas%A4.\t%0,%z3,%1\n\t%K4" ++ [(set (attr "length") (const_int 32))]) ++ + (define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:GPR 1 "register_operand" "") ;; val output +@@ -155,9 +257,29 @@ + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "" + { +- emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], +- operands[3], operands[4], +- operands[6], operands[7])); ++ rtx mod_s, mod_f; ++ ++ mod_s = operands[6]; ++ mod_f = operands[7]; ++ ++ /* Normally the succ memory model must be stronger than fail, but in the ++ unlikely event of fail being ACQUIRE and succ being RELEASE we need to ++ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ ++ ++ if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) ++ && is_mm_release (memmodel_from_int (INTVAL (mod_s)))) ++ mod_s = GEN_INT (MEMMODEL_ACQ_REL); ++ ++ operands[6] = mod_s; ++ ++ if (TARGET_uARCH_LA664) ++ emit_insn (gen_atomic_cas_value_strong_3a6000 (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6])); ++ else ++ emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6])); + + rtx compare = operands[1]; + if (operands[3] != const0_rtx) +@@ -174,7 +296,8 @@ + compare = reg; + } + +- emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx))); ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_EQ (SImode, compare, const0_rtx))); + DONE; + }) + +@@ -185,7 +308,7 @@ + "" + { + /* We have no QImode atomics, so use the address LSBs to form a mask, +- then use an aligned SImode atomic. */ ++ then use an aligned SImode atomic. */ + rtx result = operands[0]; + rtx mem = operands[1]; + rtx model = operands[2]; +@@ -221,11 +344,9 @@ + DONE; + }) + +- +- + (define_insn "atomic_cas_value_cmp_and_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") +@@ -236,23 +357,56 @@ + (clobber (match_scratch:GPR 7 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%2\n\t" +- "bne\t%7,%z4,2f\n\t" +- "and\t%7,%0,%z3\n\t" +- "or%i5\t%7,%7,%5\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b\n\t" +- "b\t3f\n\t" +- "2:\n\t" +- "dbar\t0x700\n\t" +- "3:\n\t"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%2", operands); ++ output_asm_insn ("bne\t%7,%z4,2f", operands); ++ output_asm_insn ("and\t%7,%0,%z3", operands); ++ output_asm_insn ("or%i5\t%7,%7,%5", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ { ++ output_asm_insn ("b\t3f", operands); ++ output_asm_insn ("2:", operands); ++ output_asm_insn ("dbar\t0x700", operands); ++ output_asm_insn ("3:", operands); ++ } ++ else ++ output_asm_insn ("2:", operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%2\\n\\t" ++ "bne\\t%7,%z4,2f\\n\\t" ++ "and\\t%7,%0,%z3\\n\\t" ++ "or%i5\\t%7,%7,%5\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b\\n\\t" ++ "b\\t3f\\n\\t" ++ "2:\\n\\t" ++ "dbar\\t0x700\\n\\t" ++ "3:\\n\\t"; + } + [(set (attr "length") (const_int 40))]) + +- + (define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:SHORT 1 "register_operand" "") ;; val output +@@ -264,43 +418,59 @@ + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[1], +- operands[2], +- operands[3], +- operands[4], +- operands[7]); ++ rtx mod_s, mod_f; + +- rtx compare = operands[1]; +- if (operands[3] != const0_rtx) +- { +- machine_mode mode = GET_MODE (operands[3]); +- rtx op1 = convert_modes (SImode, mode, operands[1], true); +- rtx op3 = convert_modes (SImode, mode, operands[3], true); +- rtx difference = gen_rtx_MINUS (SImode, op1, op3); +- compare = gen_reg_rtx (SImode); +- emit_insn (gen_rtx_SET (compare, difference)); +- } ++ mod_s = operands[6]; ++ mod_f = operands[7]; + +- if (word_mode != mode) ++ /* Normally the succ memory model must be stronger than fail, but in the ++ unlikely event of fail being ACQUIRE and succ being RELEASE we need to ++ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ ++ ++ if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) ++ && is_mm_release (memmodel_from_int (INTVAL (mod_s)))) ++ mod_s = GEN_INT (MEMMODEL_ACQ_REL); ++ ++ operands[6] = mod_s; ++ ++ if (TARGET_uARCH_LA664) ++ emit_insn (gen_atomic_cas_value_strong_3a6000 (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6])); ++ else + { +- rtx reg = gen_reg_rtx (word_mode); +- emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); +- compare = reg; ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; ++ loongarch_expand_atomic_qihi (generator, operands[1], operands[2], ++ operands[3], operands[4], operands[6]); + } + +- emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx))); ++ rtx compare = operands[1]; ++ if (operands[3] != const0_rtx) ++ { ++ machine_mode mode = GET_MODE (operands[3]); ++ rtx op1 = convert_modes (SImode, mode, operands[1], true); ++ rtx op3 = convert_modes (SImode, mode, operands[3], true); ++ rtx difference = gen_rtx_MINUS (SImode, op1, op3); ++ compare = gen_reg_rtx (SImode); ++ emit_insn (gen_rtx_SET (compare, difference)); ++ } ++ ++ if (word_mode != mode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); ++ compare = reg; ++ } ++ ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_EQ (SImode, compare, const0_rtx))); + DONE; + }) + +- +- +- + (define_insn "atomic_cas_value_add_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask +@@ -312,24 +482,46 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%3\n\t" +- "add.w\t%8,%0,%z5\n\t" +- "and\t%8,%8,%z2\n\t" +- "or%i8\t%7,%7,%8\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%3", operands); ++ output_asm_insn ("add.w\t%8,%0,%z5", operands); ++ output_asm_insn ("and\t%8,%8,%z2", operands); ++ output_asm_insn ("or%i8\t%7,%7,%8", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b",operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%3\\n\\t" ++ "add.w\\t%8,%0,%z5\\n\\t" ++ "and\\t%8,%8,%z2\\n\\t" ++ "or%i8\\t%7,%7,%8\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b"; + } + + [(set (attr "length") (const_int 32))]) + +- +- + (define_insn "atomic_cas_value_sub_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask +@@ -341,23 +533,45 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%3\n\t" +- "sub.w\t%8,%0,%z5\n\t" +- "and\t%8,%8,%z2\n\t" +- "or%i8\t%7,%7,%8\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%3", operands); ++ output_asm_insn ("sub.w\t%8,%0,%z5", operands); ++ output_asm_insn ("and\t%8,%8,%z2", operands); ++ output_asm_insn ("or%i8\t%7,%7,%8", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b", operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%3\\n\\t" ++ "sub.w\\t%8,%0,%z5\\n\\t" ++ "and\\t%8,%8,%z2\\n\\t" ++ "or%i8\\t%7,%7,%8\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b"; + } + [(set (attr "length") (const_int 32))]) + +- +- + (define_insn "atomic_cas_value_and_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask +@@ -369,21 +583,45 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%3\n\t" +- "and\t%8,%0,%z5\n\t" +- "and\t%8,%8,%z2\n\t" +- "or%i8\t%7,%7,%8\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%3", operands); ++ output_asm_insn ("and\t%8,%0,%z5", operands); ++ output_asm_insn ("and\t%8,%8,%z2", operands); ++ output_asm_insn ("or%i8\t%7,%7,%8", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b", operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%3\\n\\t" ++ "and\\t%8,%0,%z5\\n\\t" ++ "and\\t%8,%8,%z2\\n\\t" ++ "or%i8\\t%7,%7,%8\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b"; + } + [(set (attr "length") (const_int 32))]) + + (define_insn "atomic_cas_value_xor_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask +@@ -395,22 +633,46 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%3\n\t" +- "xor\t%8,%0,%z5\n\t" +- "and\t%8,%8,%z2\n\t" +- "or%i8\t%7,%7,%8\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%3", operands); ++ output_asm_insn ("xor\t%8,%0,%z5", operands); ++ output_asm_insn ("and\t%8,%8,%z2", operands); ++ output_asm_insn ("or%i8\t%7,%7,%8", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b", operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%3\\n\\t" ++ "xor\\t%8,%0,%z5\\n\\t" ++ "and\\t%8,%8,%z2\\n\\t" ++ "or%i8\\t%7,%7,%8\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b"; + } + + [(set (attr "length") (const_int 32))]) + + (define_insn "atomic_cas_value_or_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask +@@ -422,22 +684,46 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%3\n\t" +- "or\t%8,%0,%z5\n\t" +- "and\t%8,%8,%z2\n\t" +- "or%i8\t%7,%7,%8\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%3", operands); ++ output_asm_insn ("or\t%8,%0,%z5", operands); ++ output_asm_insn ("and\t%8,%8,%z2", operands); ++ output_asm_insn ("or%i8\t%7,%7,%8", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b", operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%3\\n\\t" ++ "or\\t%8,%0,%z5\\n\\t" ++ "and\\t%8,%8,%z2\\n\\t" ++ "or%i8\\t%7,%7,%8\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b"; + } + + [(set (attr "length") (const_int 32))]) + + (define_insn "atomic_cas_value_nand_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask +@@ -449,21 +735,45 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\n\t" +- "1:\n\t" +- "ll.\t%0,%1\n\t" +- "and\t%7,%0,%3\n\t" +- "and\t%8,%0,%z5\n\t" +- "xor\t%8,%8,%z2\n\t" +- "or%i8\t%7,%7,%8\n\t" +- "sc.\t%7,%1\n\t" +- "beq\t$zero,%7,1b"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%3", operands); ++ output_asm_insn ("and\t%8,%0,%z5", operands); ++ output_asm_insn ("xor\t%8,%8,%z2", operands); ++ output_asm_insn ("or%i8\t%7,%7,%8", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beq\t$zero,%7,1b", operands); ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%3\\n\\t" ++ "and\\t%8,%0,%z5\\n\\t" ++ "xor\\t%8,%8,%z2\\n\\t" ++ "or%i8\\t%7,%7,%8\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beq\\t$zero,%7,1b"; + } + [(set (attr "length") (const_int 32))]) + + (define_insn "atomic_cas_value_exchange_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") +- (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") +@@ -474,13 +784,36 @@ + (clobber (match_scratch:GPR 7 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" +- "ll.\\t%0,%1\\n\\t" +- "and\\t%7,%0,%z3\\n\\t" +- "or%i5\\t%7,%7,%5\\n\\t" +- "sc.\\t%7,%1\\n\\t" +- "beqz\\t%7,1b\\n\\t"; ++ if (TARGET_uARCH_LA664) ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[6])); ++ output_asm_insn ("1:",operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ output_asm_insn ("ll.\t%0,%1", operands); ++ else ++ output_asm_insn ("llacq.\t%0,%1", operands); ++ ++ output_asm_insn ("and\t%7,%0,%z3", operands); ++ output_asm_insn ("or%i5\t%7,%7,%5", operands); ++ ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ output_asm_insn ("sc.\t%7,%1", operands); ++ else ++ output_asm_insn ("screl.\t%7,%1", operands); ++ ++ output_asm_insn ("beqz\t%7,1b", operands); ++ ++ return ""; ++ } ++ else ++ return "%G6\\n\\t" ++ "1:\\n\\t" ++ "ll.\\t%0,%1\\n\\t" ++ "and\\t%7,%0,%z3\\n\\t" ++ "or%i5\\t%7,%7,%5\\n\\t" ++ "sc.\\t%7,%1\\n\\t" ++ "beqz\\t%7,1b\\n\\t"; + } + [(set (attr "length") (const_int 20))]) + +@@ -494,17 +827,30 @@ + (match_operand:SHORT 2 "register_operand"))] + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_exchange_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- const0_rtx, +- operands[2], +- operands[3]); ++ if (TARGET_uARCH_LA664) ++ emit_insn (gen_atomic_exchange_1 (operands[0], operands[1], operands[2], operands[3])); ++ else ++ { ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_exchange_7_si; ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ const0_rtx, operands[2], operands[3]); ++ } + DONE; + }) + ++(define_insn "atomic_fetch_add_1" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(plus:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++ "%J3\n\tamadd%A3.\t%0,%z2,%1\n\t%K3" ++ [(set (attr "length") (const_int 8))]) + + (define_expand "atomic_fetch_add" + [(set (match_operand:SHORT 0 "register_operand" "=&r") +@@ -512,19 +858,21 @@ + (set (match_dup 1) + (unspec_volatile:SHORT + [(plus:SHORT (match_dup 1) +- (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_add_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- operands[1], +- operands[2], +- operands[3]); ++ if (TARGET_uARCH_LA664) ++ emit_insn (gen_atomic_fetch_add_1 (operands[0], operands[1], ++ operands[2], operands[3])); ++ else ++ { ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_add_7_si; ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); ++ } + DONE; + }) + +@@ -534,19 +882,15 @@ + (set (match_dup 1) + (unspec_volatile:SHORT + [(minus:SHORT (match_dup 1) +- (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + { + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_sub_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- operands[1], +- operands[2], +- operands[3]); ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); + DONE; + }) + +@@ -556,19 +900,15 @@ + (set (match_dup 1) + (unspec_volatile:SHORT + [(and:SHORT (match_dup 1) +- (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + { + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_and_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- operands[1], +- operands[2], +- operands[3]); ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); + DONE; + }) + +@@ -578,19 +918,15 @@ + (set (match_dup 1) + (unspec_volatile:SHORT + [(xor:SHORT (match_dup 1) +- (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + { + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_xor_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- operands[1], +- operands[2], +- operands[3]); ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); + DONE; + }) + +@@ -600,19 +936,15 @@ + (set (match_dup 1) + (unspec_volatile:SHORT + [(ior:SHORT (match_dup 1) +- (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + { + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_or_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- operands[1], +- operands[2], +- operands[3]); ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); + DONE; + }) + +@@ -622,18 +954,14 @@ + (set (match_dup 1) + (unspec_volatile:SHORT + [(not:SHORT (and:SHORT (match_dup 1) +- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + { + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_nand_7_si; +- loongarch_expand_atomic_qihi (generator, +- operands[0], +- operands[1], +- operands[1], +- operands[2], +- operands[3]); ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); + DONE; + }) +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 58f27f89d..5ecf814fa 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -16,8 +16,65 @@ + # along with GCC; see the file COPYING3. If not see + # . + +-MULTILIB_OSDIRNAMES := ../lib64$(call if_multiarch,:loongarch64-linux-gnu) +-MULTIARCH_DIRNAME := $(call if_multiarch,loongarch64-linux-gnu) ++# Multilib ++MULTILIB_OPTIONS = mabi=lp64d/mabi=lp64f/mabi=lp64s ++MULTILIB_DIRNAMES = . base/lp64f base/lp64s ++ ++# The GCC driver always gets all abi-related options on the command line. ++# (see loongarch-driver.c:driver_get_normalized_m_opts) ++comma=, ++MULTILIB_REQUIRED = $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\ ++ $(firstword $(subst /, ,$(mlib)))) + +-# haven't supported lp32 yet +-MULTILIB_EXCEPTIONS = mabi=lp32 ++SPECS = specs.install ++ ++# temporary self_spec when building libraries (e.g. libgcc) ++gen_mlib_spec = $(if $(word 2,$1),\ ++ %{$(firstword $1):$(patsubst %,-%,$(wordlist 2,$(words $1),$1))}) ++ ++# clean up the result of DRIVER_SELF_SPEC to avoid conflict ++lib_build_self_spec = % $@ ++ ++# Remove lib_build_self_specs before regression tests. ++.PHONY: remove-lib-specs ++check check-host check-target $(CHECK_TARGETS) $(lang_checks): remove-lib-specs ++remove-lib-specs: ++ -mv -f specs.install specs 2>/dev/null ++ ++# Multiarch ++ifneq ($(call if_multiarch,yes),yes) ++ # Define LA_DISABLE_MULTIARCH if multiarch is disabled. ++ tm_defines += LA_DISABLE_MULTIARCH ++else ++ # Only define MULTIARCH_DIRNAME when multiarch is enabled, ++ # or it would always introduce ${target} into the search path. ++ MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET) ++endif ++ ++# Don't define MULTILIB_OSDIRNAMES if multilib is disabled. ++ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) ++ ++ MULTILIB_OSDIRNAMES = \ ++ mabi.lp64d=../lib64$\ ++ $(call if_multiarch,:loongarch64-linux-gnu) ++ ++ MULTILIB_OSDIRNAMES += \ ++ mabi.lp64f=../lib64/f32$\ ++ $(call if_multiarch,:loongarch64-linux-gnuf32) ++ ++ MULTILIB_OSDIRNAMES += \ ++ mabi.lp64s=../lib64/sf$\ ++ $(call if_multiarch,:loongarch64-linux-gnusf) ++else ++ MULTILIB_OSDIRNAMES := ../lib64 ++endif +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 5689da44a..9d32fbcf6 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -16,14 +16,20 @@ + # along with GCC; see the file COPYING3. If not see + # . + +-$(srcdir)/config/loongarch/loongarch-tables.opt: $(srcdir)/config/loongarch/genopt.sh \ +- $(srcdir)/config/loongarch/loongarch-cpus.def +- $(SHELL) $(srcdir)/config/loongarch/genopt.sh $(srcdir)/config/loongarch > \ +- $(srcdir)/config/loongarch/loongarch-tables.opt ++# Canonical target triplet from config.gcc ++LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\ ++$(filter LA_MULTIARCH_TRIPLET=%,$(tm_defines))) + +-frame-header-opt.o: $(srcdir)/config/loongarch/frame-header-opt.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++# String definition header ++LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h ++$(LA_STR_H): s-loongarch-str ; @true ++s-loongarch-str: $(srcdir)/config/loongarch/genopts/genstr.sh \ ++ $(srcdir)/config/loongarch/genopts/loongarch-strings ++ $(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh header \ ++ $(srcdir)/config/loongarch/genopts/loongarch-strings > \ ++ tmp-loongarch-str.h ++ $(SHELL) $(srcdir)/../move-if-change tmp-loongarch-str.h $(LA_STR_H) ++ $(STAMP) s-loongarch-str + + loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) +@@ -31,15 +37,32 @@ loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.c $(CONFIG_H) $(SYSTEM_H) + $(srcdir)/config/loongarch/loongarch-c.c + + loongarch-builtins.o: $(srcdir)/config/loongarch/loongarch-builtins.c $(CONFIG_H) \ +- $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \ +- $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \ +- $(srcdir)/config/loongarch/loongarch-modes.def ++ $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \ ++ $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \ ++ $(srcdir)/config/loongarch/loongarch-modes.def + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ +- $(srcdir)/config/loongarch/loongarch-builtins.c +-loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.c +- $(COMPILE) $< +- $(POSTCOMPILE) +- +-comma=, +-MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) +-MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) ++ $(srcdir)/config/loongarch/loongarch-builtins.c ++ ++loongarch-driver.o : $(srcdir)/config/loongarch/loongarch-driver.c $(LA_STR_H) \ ++ $(CONFIG_H) $(SYSTEM_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ++ ++loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.c $(LA_STR_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ++ ++loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.c $(LA_STR_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ++ ++loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) ++ $(CC) -c $(ALL_CFLAGS) $(INCLUDES) $< ++ ++$(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true ++s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ ++ $(srcdir)/config/loongarch/genopts/loongarch.opt.in ++ $(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \ ++ $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ ++ > tmp-loongarch.opt ++ $(SHELL) $(srcdir)/../move-if-change tmp-loongarch.opt \ ++ $(srcdir)/config/loongarch/loongarch.opt ++ $(STAMP) s-loongarch-opt ++ +diff --git a/gcc/config/loongarch/x-native b/gcc/config/loongarch/x-native +deleted file mode 100644 +index 827d21f1a..000000000 +--- a/gcc/config/loongarch/x-native ++++ /dev/null +@@ -1,3 +0,0 @@ +-driver-native.o : $(srcdir)/config/loongarch/driver-native.c \ +- $(CONFIG_H) $(SYSTEM_H) +- $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< +diff --git a/libgcc/config/loongarch/crtfastmath.c b/libgcc/config/loongarch/crtfastmath.c +index d7371de6d..5f7b298ac 100644 +--- a/libgcc/config/loongarch/crtfastmath.c ++++ b/libgcc/config/loongarch/crtfastmath.c +@@ -1,30 +1,32 @@ +-/* Copyright (C) 2010-2018 Free Software Foundation, Inc. ++/* Copyright (C) 2020-2022 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ Based on MIPS target for GNU compiler. + +- This file is part of GCC. ++This file is part of GCC. + +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published by +- the Free Software Foundation; either version 3, or (at your option) +- any later version. ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. + +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++License for more details. + +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. + +- You should have received a copy of the GNU General Public License +- and a copy of the GCC Runtime Library Exception along with this +- program; see the files COPYING3 and COPYING.RUNTIME respectively. +- If not, see . */ ++You should have received a copy of the GNU General Public License ++and a copy of the GCC Runtime Library Exception along with this ++program; see the files COPYING3 and COPYING.RUNTIME respectively. ++If not, see . */ + + #ifdef __loongarch_hard_float + + /* Rounding control. */ +-#define _FPU_RC_NEAREST 0x000 /* RECOMMENDED */ ++#define _FPU_RC_NEAREST 0x000 /* RECOMMENDED. */ + #define _FPU_RC_ZERO 0x100 + #define _FPU_RC_UP 0x200 + #define _FPU_RC_DOWN 0x300 +@@ -33,18 +35,18 @@ + #define _FPU_IEEE 0x0000001F + + /* Macros for accessing the hardware control word. */ +-#define _FPU_GETCW(cw) __asm__ ("movgr2fcsr %0,$r1" : "=r" (cw)) +-#define _FPU_SETCW(cw) __asm__ ("movfcsr2gr %0,$r1" : : "r" (cw)) ++#define _FPU_GETCW(cw) __asm__ volatile ("movfcsr2gr %0,$r0" : "=r" (cw)) ++#define _FPU_SETCW(cw) __asm__ volatile ("movgr2fcsr $r0,%0" : : "r" (cw)) + + static void __attribute__((constructor)) + set_fast_math (void) + { + unsigned int fcr; + +- /* round to nearest, IEEE exceptions disabled. */ ++ /* Flush to zero, round to nearest, IEEE exceptions disabled. */ + fcr = _FPU_RC_NEAREST; + +- _FPU_SETCW(fcr); ++ _FPU_SETCW (fcr); + } + +-#endif /* __loongarch_hard_float */ ++#endif /* __loongarch_hard_float */ +diff --git a/libgcc/config/loongarch/crti.S b/libgcc/config/loongarch/crti.S +deleted file mode 100644 +index dcd05afea..000000000 +--- a/libgcc/config/loongarch/crti.S ++++ /dev/null +@@ -1,43 +0,0 @@ +-/* Copyright (C) 2001-2018 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify it under +-the terms of the GNU General Public License as published by the Free +-Software Foundation; either version 3, or (at your option) any later +-version. +- +-GCC is distributed in the hope that it will be useful, but WITHOUT ANY +-WARRANTY; without even the implied warranty of MERCHANTABILITY or +-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-for more details. +- +-Under Section 7 of GPL version 3, you are granted additional +-permissions described in the GCC Runtime Library Exception, version +-3.1, as published by the Free Software Foundation. +- +-You should have received a copy of the GNU General Public License and +-a copy of the GCC Runtime Library Exception along with this program; +-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +-. */ +- +-/* 4 slots for argument spill area. 1 for cpreturn, 1 for stack. +- Return spill offset of 40 and 20. Aligned to 16 bytes for n32. */ +- +- .section .init,"ax",@progbits +- .globl _init +- .type _init,@function +-_init: +- addi.d $r3,$r3,-48 +- st.d $r1,$r3,40 +- addi.d $r3,$r3,48 +- jirl $r0,$r1,0 +- +- .section .fini,"ax",@progbits +- .globl _fini +- .type _fini,@function +-_fini: +- addi.d $r3,$r3,-48 +- st.d $r1,$r3,40 +- addi.d $r3,$r3,48 +- jirl $r0,$r1,0 +diff --git a/libgcc/config/loongarch/crtn.S b/libgcc/config/loongarch/crtn.S +deleted file mode 100644 +index 91d9d5e7f..000000000 +--- a/libgcc/config/loongarch/crtn.S ++++ /dev/null +@@ -1,39 +0,0 @@ +-/* Copyright (C) 2001-2018 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify it under +-the terms of the GNU General Public License as published by the Free +-Software Foundation; either version 3, or (at your option) any later +-version. +- +-GCC is distributed in the hope that it will be useful, but WITHOUT ANY +-WARRANTY; without even the implied warranty of MERCHANTABILITY or +-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-for more details. +- +-Under Section 7 of GPL version 3, you are granted additional +-permissions described in the GCC Runtime Library Exception, version +-3.1, as published by the Free Software Foundation. +- +-You should have received a copy of the GNU General Public License and +-a copy of the GCC Runtime Library Exception along with this program; +-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +-. */ +- +-/* 4 slots for argument spill area. 1 for cpreturn, 1 for stack. +- Return spill offset of 40 and 20. Aligned to 16 bytes for n32. */ +- +- +- .section .init,"ax",@progbits +-init: +- ld.d $r1,$r3,40 +- addi.d $r3,$r3,48 +- jirl $r0,$r1,0 +- +- .section .fini,"ax",@progbits +-fini: +- ld.d $r1,$r3,40 +- addi.d $r3,$r3,48 +- jirl $r0,$r1,0 +- +diff --git a/libgcc/config/loongarch/gthr-loongnixsde.h b/libgcc/config/loongarch/gthr-loongnixsde.h +deleted file mode 100644 +index f62b57318..000000000 +--- a/libgcc/config/loongarch/gthr-loongnixsde.h ++++ /dev/null +@@ -1,237 +0,0 @@ +-/* LARCH SDE threads compatibility routines for libgcc2 and libobjc. */ +-/* Compile this one with gcc. */ +-/* Copyright (C) 2006-2018 Free Software Foundation, Inc. +- Contributed by Nigel Stephens +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify it under +-the terms of the GNU General Public License as published by the Free +-Software Foundation; either version 3, or (at your option) any later +-version. +- +-GCC is distributed in the hope that it will be useful, but WITHOUT ANY +-WARRANTY; without even the implied warranty of MERCHANTABILITY or +-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-for more details. +- +-Under Section 7 of GPL version 3, you are granted additional +-permissions described in the GCC Runtime Library Exception, version +-3.1, as published by the Free Software Foundation. +- +-You should have received a copy of the GNU General Public License and +-a copy of the GCC Runtime Library Exception along with this program; +-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +-. */ +- +-#ifndef GCC_GTHR_LARCHSDE_H +-#define GCC_GTHR_LARCHSDE_H +- +-/* LARCH SDE threading API specific definitions. +- Easy, since the interface is pretty much one-to-one. */ +- +-#define __GTHREADS 1 +- +-#include +-#include +- +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-typedef __sdethread_key_t __gthread_key_t; +-typedef __sdethread_once_t __gthread_once_t; +-typedef __sdethread_mutex_t __gthread_mutex_t; +- +-typedef struct { +- long depth; +- __sdethread_t owner; +- __sdethread_mutex_t actual; +-} __gthread_recursive_mutex_t; +- +-#define __GTHREAD_MUTEX_INIT __SDETHREAD_MUTEX_INITIALIZER("gthr") +-#define __GTHREAD_ONCE_INIT __SDETHREAD_ONCE_INIT +-static inline int +-__gthread_recursive_mutex_init_function(__gthread_recursive_mutex_t *__mutex); +-#define __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION __gthread_recursive_mutex_init_function +- +-#if SUPPORTS_WEAK && GTHREAD_USE_WEAK +-# define __gthrw(name) \ +- static __typeof(name) __gthrw_ ## name __attribute__ ((__weakref__(#name))); +-# define __gthrw_(name) __gthrw_ ## name +-#else +-# define __gthrw(name) +-# define __gthrw_(name) name +-#endif +- +-__gthrw(__sdethread_once) +-__gthrw(__sdethread_key_create) +-__gthrw(__sdethread_key_delete) +-__gthrw(__sdethread_getspecific) +-__gthrw(__sdethread_setspecific) +- +-__gthrw(__sdethread_self) +- +-__gthrw(__sdethread_mutex_lock) +-__gthrw(__sdethread_mutex_trylock) +-__gthrw(__sdethread_mutex_unlock) +- +-__gthrw(__sdethread_mutex_init) +- +-__gthrw(__sdethread_threading) +- +-#if SUPPORTS_WEAK && GTHREAD_USE_WEAK +- +-static inline int +-__gthread_active_p (void) +-{ +- return !!(void *)&__sdethread_threading; +-} +- +-#else /* not SUPPORTS_WEAK */ +- +-static inline int +-__gthread_active_p (void) +-{ +- return 1; +-} +- +-#endif /* SUPPORTS_WEAK */ +- +-static inline int +-__gthread_once (__gthread_once_t *__once, void (*__func) (void)) +-{ +- if (__gthread_active_p ()) +- return __gthrw_(__sdethread_once) (__once, __func); +- else +- return -1; +-} +- +-static inline int +-__gthread_key_create (__gthread_key_t *__key, void (*__dtor) (void *)) +-{ +- return __gthrw_(__sdethread_key_create) (__key, __dtor); +-} +- +-static inline int +-__gthread_key_delete (__gthread_key_t __key) +-{ +- return __gthrw_(__sdethread_key_delete) (__key); +-} +- +-static inline void * +-__gthread_getspecific (__gthread_key_t __key) +-{ +- return __gthrw_(__sdethread_getspecific) (__key); +-} +- +-static inline int +-__gthread_setspecific (__gthread_key_t __key, const void *__ptr) +-{ +- return __gthrw_(__sdethread_setspecific) (__key, __ptr); +-} +- +-static inline int +-__gthread_mutex_destroy (__gthread_mutex_t * UNUSED(__mutex)) +-{ +- return 0; +-} +- +-static inline int +-__gthread_mutex_lock (__gthread_mutex_t *__mutex) +-{ +- if (__gthread_active_p ()) +- return __gthrw_(__sdethread_mutex_lock) (__mutex); +- else +- return 0; +-} +- +-static inline int +-__gthread_mutex_trylock (__gthread_mutex_t *__mutex) +-{ +- if (__gthread_active_p ()) +- return __gthrw_(__sdethread_mutex_trylock) (__mutex); +- else +- return 0; +-} +- +-static inline int +-__gthread_mutex_unlock (__gthread_mutex_t *__mutex) +-{ +- if (__gthread_active_p ()) +- return __gthrw_(__sdethread_mutex_unlock) (__mutex); +- else +- return 0; +-} +- +-static inline int +-__gthread_recursive_mutex_init_function (__gthread_recursive_mutex_t *__mutex) +-{ +- __mutex->depth = 0; +- __mutex->owner = __gthrw_(__sdethread_self) (); +- return __gthrw_(__sdethread_mutex_init) (&__mutex->actual, NULL); +-} +- +-static inline int +-__gthread_recursive_mutex_lock (__gthread_recursive_mutex_t *__mutex) +-{ +- if (__gthread_active_p ()) +- { +- __sdethread_t __me = __gthrw_(__sdethread_self) (); +- +- if (__mutex->owner != __me) +- { +- __gthrw_(__sdethread_mutex_lock) (&__mutex->actual); +- __mutex->owner = __me; +- } +- +- __mutex->depth++; +- } +- return 0; +-} +- +-static inline int +-__gthread_recursive_mutex_trylock (__gthread_recursive_mutex_t *__mutex) +-{ +- if (__gthread_active_p ()) +- { +- __sdethread_t __me = __gthrw_(__sdethread_self) (); +- +- if (__mutex->owner != __me) +- { +- if (__gthrw_(__sdethread_mutex_trylock) (&__mutex->actual)) +- return 1; +- __mutex->owner = __me; +- } +- +- __mutex->depth++; +- } +- return 0; +-} +- +-static inline int +-__gthread_recursive_mutex_unlock (__gthread_recursive_mutex_t *__mutex) +-{ +- if (__gthread_active_p ()) +- { +- if (--__mutex->depth == 0) +- { +- __mutex->owner = (__sdethread_t) 0; +- __gthrw_(__sdethread_mutex_unlock) (&__mutex->actual); +- } +- } +- return 0; +-} +- +-static inline int +-__gthread_recursive_mutex_destroy (__gthread_recursive_mutex_t +- * UNUSED(__mutex)) +-{ +- return 0; +-} +- +-#ifdef __cplusplus +-} +-#endif +- +-#endif /* ! GCC_GTHR_LARCHSDE_H */ +diff --git a/libgcc/config/loongarch/linux-unwind.h b/libgcc/config/loongarch/linux-unwind.h +index d77dfb058..30603e44f 100644 +--- a/libgcc/config/loongarch/linux-unwind.h ++++ b/libgcc/config/loongarch/linux-unwind.h +@@ -1,5 +1,5 @@ +-/* DWARF2 EH unwinding support for LARCH Linux. +- Copyright (C) 2004-2018 Free Software Foundation, Inc. ++/* DWARF2 EH unwinding support for LoongArch Linux. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -34,26 +34,27 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + + static _Unwind_Reason_Code + loongarch_fallback_frame_state (struct _Unwind_Context *context, +- _Unwind_FrameState *fs) ++ _Unwind_FrameState *fs) + { + u_int32_t *pc = (u_int32_t *) context->ra; + struct sigcontext *sc; + _Unwind_Ptr new_cfa; + int i; + +- /* 03822c0b dli a7, 0x8b (sigreturn) */ +- /* 002b0000 syscall 0 */ ++ /* 03822c0b li.d a7, 0x8b (sigreturn) */ ++ /* 002b0000 syscall 0 */ + if (pc[1] != 0x002b0000) + return _URC_END_OF_STACK; + if (pc[0] == 0x03822c0b) + { +- struct rt_sigframe { ++ struct rt_sigframe ++ { + u_int32_t ass[4]; /* Argument save space for o32. */ + u_int32_t trampoline[2]; + siginfo_t info; + ucontext_t uc; + } *rt_ = context->cfa; +- sc = &rt_->uc.uc_mcontext; ++ sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; +@@ -63,17 +64,17 @@ loongarch_fallback_frame_state (struct _Unwind_Context *context, + fs->regs.cfa_reg = __LIBGCC_STACK_POINTER_REGNUM__; + fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; + +- for (i = 0; i < 32; i++) { +- fs->regs.reg[i].how = REG_SAVED_OFFSET; +- fs->regs.reg[i].loc.offset +- = (_Unwind_Ptr)&(sc->sc_regs[i]) - new_cfa; +- } ++ for (i = 0; i < 32; i++) ++ { ++ fs->regs.reg[i].how = REG_SAVED_OFFSET; ++ fs->regs.reg[i].loc.offset = (_Unwind_Ptr) & (sc->sc_regs[i]) - new_cfa; ++ } + + fs->signal_frame = 1; + fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].how + = REG_SAVED_VAL_OFFSET; + fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].loc.offset +- = (_Unwind_Ptr)(sc->sc_pc) - new_cfa; ++ = (_Unwind_Ptr) (sc->sc_pc) - new_cfa; + fs->retaddr_column = __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__; + + return _URC_NO_REASON; +diff --git a/libgcc/config/loongarch/sfp-machine.h b/libgcc/config/loongarch/sfp-machine.h +index f7800a003..420f94274 100644 +--- a/libgcc/config/loongarch/sfp-machine.h ++++ b/libgcc/config/loongarch/sfp-machine.h +@@ -1,5 +1,5 @@ +-/* softfp machine description for LARCH. +- Copyright (C) 2009-2018 Free Software Foundation, Inc. ++/* softfp machine description for LoongArch. ++ Copyright (C) 2020-2022 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -23,49 +23,49 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + #ifdef __loongarch64 +-#define _FP_W_TYPE_SIZE 64 +-#define _FP_W_TYPE unsigned long long +-#define _FP_WS_TYPE signed long long +-#define _FP_I_TYPE long long ++#define _FP_W_TYPE_SIZE 64 ++#define _FP_W_TYPE unsigned long long ++#define _FP_WS_TYPE signed long long ++#define _FP_I_TYPE long long + + typedef int TItype __attribute__ ((mode (TI))); + typedef unsigned int UTItype __attribute__ ((mode (TI))); + #define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype)) + +-#define _FP_MUL_MEAT_S(R,X,Y) \ +- _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +-#define _FP_MUL_MEAT_D(R,X,Y) \ +- _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +-#define _FP_MUL_MEAT_Q(R,X,Y) \ +- _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) ++#define _FP_MUL_MEAT_S(R, X, Y) \ ++ _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_S, R, X, Y, umul_ppmm) ++#define _FP_MUL_MEAT_D(R, X, Y) \ ++ _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_D, R, X, Y, umul_ppmm) ++#define _FP_MUL_MEAT_Q(R, X, Y) \ ++ _FP_MUL_MEAT_2_wide (_FP_WFRACBITS_Q, R, X, Y, umul_ppmm) + +-#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) +-#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +-#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) ++#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm (S, R, X, Y) ++#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_1_udiv_norm (D, R, X, Y) ++#define _FP_DIV_MEAT_Q(R, X, Y) _FP_DIV_MEAT_2_udiv (Q, R, X, Y) + +-# define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +-# define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +-# define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 ++#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) ++#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) ++#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 + #else +-#define _FP_W_TYPE_SIZE 32 +-#define _FP_W_TYPE unsigned int +-#define _FP_WS_TYPE signed int +-#define _FP_I_TYPE int +- +-#define _FP_MUL_MEAT_S(R,X,Y) \ +- _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +-#define _FP_MUL_MEAT_D(R,X,Y) \ +- _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +-#define _FP_MUL_MEAT_Q(R,X,Y) \ +- _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) +- +-#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) +-#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +-#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) +- +-# define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +-# define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +-# define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 ++#define _FP_W_TYPE_SIZE 32 ++#define _FP_W_TYPE unsigned int ++#define _FP_WS_TYPE signed int ++#define _FP_I_TYPE int ++ ++#define _FP_MUL_MEAT_S(R, X, Y) \ ++ _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_S, R, X, Y, umul_ppmm) ++#define _FP_MUL_MEAT_D(R, X, Y) \ ++ _FP_MUL_MEAT_2_wide (_FP_WFRACBITS_D, R, X, Y, umul_ppmm) ++#define _FP_MUL_MEAT_Q(R, X, Y) \ ++ _FP_MUL_MEAT_4_wide (_FP_WFRACBITS_Q, R, X, Y, umul_ppmm) ++ ++#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm (S, R, X, Y) ++#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv (D, R, X, Y) ++#define _FP_DIV_MEAT_Q(R, X, Y) _FP_DIV_MEAT_4_udiv (Q, R, X, Y) ++ ++#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) ++#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 ++#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 + #endif + + /* The type of the result of a floating point comparison. This must +@@ -73,76 +73,80 @@ typedef unsigned int UTItype __attribute__ ((mode (TI))); + typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); + #define CMPtype __gcc_CMPtype + +-#define _FP_NANSIGN_S 0 +-#define _FP_NANSIGN_D 0 +-#define _FP_NANSIGN_Q 0 ++#define _FP_NANSIGN_S 0 ++#define _FP_NANSIGN_D 0 ++#define _FP_NANSIGN_Q 0 + + #define _FP_KEEPNANFRACP 1 +-# define _FP_QNANNEGATEDP 0 ++#define _FP_QNANNEGATEDP 0 + + /* NaN payloads should be preserved for NAN2008. */ +-# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ +- do \ +- { \ +- R##_s = X##_s; \ +- _FP_FRAC_COPY_##wc (R, X); \ +- R##_c = FP_CLS_NAN; \ +- } \ ++#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ ++ do \ ++ { \ ++ R##_s = X##_s; \ ++ _FP_FRAC_COPY_##wc (R, X); \ ++ R##_c = FP_CLS_NAN; \ ++ } \ + while (0) + + #ifdef __loongarch_hard_float +-#define FP_EX_INVALID 0x100000 +-#define FP_EX_DIVZERO 0x080000 +-#define FP_EX_OVERFLOW 0x040000 +-#define FP_EX_UNDERFLOW 0x020000 +-#define FP_EX_INEXACT 0x010000 ++#define FP_EX_INVALID 0x100000 ++#define FP_EX_DIVZERO 0x080000 ++#define FP_EX_OVERFLOW 0x040000 ++#define FP_EX_UNDERFLOW 0x020000 ++#define FP_EX_INEXACT 0x010000 + #define FP_EX_ALL \ +- (FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \ +- | FP_EX_INEXACT) ++ (FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \ ++ | FP_EX_INEXACT) + +-#define FP_EX_ENABLE_SHIFT 16 +-#define FP_EX_CAUSE_SHIFT 8 ++#define FP_EX_ENABLE_SHIFT 16 ++#define FP_EX_CAUSE_SHIFT 8 + +-#define FP_RND_NEAREST 0x000 +-#define FP_RND_ZERO 0x100 +-#define FP_RND_PINF 0x200 +-#define FP_RND_MINF 0x300 +-#define FP_RND_MASK 0x300 ++#define FP_RND_NEAREST 0x000 ++#define FP_RND_ZERO 0x100 ++#define FP_RND_PINF 0x200 ++#define FP_RND_MINF 0x300 ++#define FP_RND_MASK 0x300 + + #define _FP_DECL_EX \ + unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST + +-#define FP_INIT_ROUNDMODE \ +- do { \ +- _fcsr = __builtin_loongarch_movfcsr2gr (0); \ +- } while (0) ++#define FP_INIT_ROUNDMODE \ ++ do \ ++ { \ ++ _fcsr = __builtin_loongarch_movfcsr2gr (0); \ ++ } \ ++ while (0) + + #define FP_ROUNDMODE (_fcsr & FP_RND_MASK) + + #define FP_TRAPPING_EXCEPTIONS ((_fcsr << FP_EX_ENABLE_SHIFT) & FP_EX_ALL) + +-#define FP_HANDLE_EXCEPTIONS \ +- do { \ +- _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \ +- _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \ +- __builtin_loongarch_movgr2fcsr (0, _fcsr); \ +- } while (0) ++#define FP_HANDLE_EXCEPTIONS \ ++ do \ ++ { \ ++ _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \ ++ _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \ ++ __builtin_loongarch_movgr2fcsr (0, _fcsr); \ ++ } \ ++ while (0) + + #else +-#define FP_EX_INVALID (1 << 4) +-#define FP_EX_DIVZERO (1 << 3) +-#define FP_EX_OVERFLOW (1 << 2) +-#define FP_EX_UNDERFLOW (1 << 1) +-#define FP_EX_INEXACT (1 << 0) ++#define FP_EX_INVALID (1 << 4) ++#define FP_EX_DIVZERO (1 << 3) ++#define FP_EX_OVERFLOW (1 << 2) ++#define FP_EX_UNDERFLOW (1 << 1) ++#define FP_EX_INEXACT (1 << 0) + #endif + + #define _FP_TININESS_AFTER_ROUNDING 1 + +-#define __LITTLE_ENDIAN 1234 ++#define __LITTLE_ENDIAN 1234 + +-# define __BYTE_ORDER __LITTLE_ENDIAN ++#define __BYTE_ORDER __LITTLE_ENDIAN + + /* Define ALIASNAME as a strong alias for NAME. */ +-# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +-# define _strong_alias(name, aliasname) \ ++#define strong_alias(name, aliasname) _strong_alias (name, aliasname) ++#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); +diff --git a/libgcc/config/loongarch/t-elf b/libgcc/config/loongarch/t-elf +deleted file mode 100644 +index 651f10a53..000000000 +--- a/libgcc/config/loongarch/t-elf ++++ /dev/null +@@ -1,3 +0,0 @@ +-# We must build libgcc2.a with -G 0, in case the user wants to link +-# without the $gp register. +-HOST_LIBGCC2_CFLAGS += -G 0 +diff --git a/libgcc/config/loongarch/t-loongarch b/libgcc/config/loongarch/t-loongarch +index 9508cb2fc..2a7dbf6ca 100644 +--- a/libgcc/config/loongarch/t-loongarch ++++ b/libgcc/config/loongarch/t-loongarch +@@ -5,5 +5,3 @@ softfp_int_modes := si di + softfp_extensions := + softfp_truncations := + softfp_exclude_libgcc2 := n +- +-LIB2ADD_ST += $(srcdir)/config/loongarch/lib2funcs.c +diff --git a/libgcc/config/loongarch/t-sdemtk b/libgcc/config/loongarch/t-sdemtk +deleted file mode 100644 +index a06e828b5..000000000 +--- a/libgcc/config/loongarch/t-sdemtk ++++ /dev/null +@@ -1,3 +0,0 @@ +-# Don't build FPBIT and DPBIT; we'll be using the SDE soft-float library. +-FPBIT = +-DPBIT = +diff --git a/libgcc/config/loongarch/t-vr b/libgcc/config/loongarch/t-vr +deleted file mode 100644 +index e69de29bb..000000000 +-- +2.39.3 + diff --git a/Improve-specs-processing-to-allow-in-function-argume.patch b/Improve-specs-processing-to-allow-in-function-argume.patch new file mode 100644 index 0000000..b7504b4 --- /dev/null +++ b/Improve-specs-processing-to-allow-in-function-argume.patch @@ -0,0 +1,220 @@ +From 6e6de5b31ac9b5a523fbf60099d00124d99aa0d0 Mon Sep 17 00:00:00 2001 +From: Lixing +Date: Mon, 31 Jul 2023 10:08:23 +0800 +Subject: [PATCH 2/2] Improve specs processing to allow %* in function + arguments + +2018-07-31 Olivier Hainque + + * gcc.c (handle_spec_function): Accept a soft_matched_part + argument, as do_spec_1. Pass it down to ... + (eval_spec_function): Accept a soft_matched_part argument, + and pass it down to ... + (do_spec_2): Accept a soft_matched_part argument, and pass + it down to do_spec_1. + (do_spec_1): Pass soft_matched_part to handle_spec_function. + (handle_braces): Update call to handle_spec_function. + (driver::set_up_specs): Update calls to do_spec_2. + (compare_debug_dump_opt_spec_function): Likewise. + (compare_debug_self_opt_spec_function): Likewise. + +[Upstream] 1102fd64dbb767 (Deleted ChangeLog) +Link: https://gcc.gnu.org/git/?p=gcc.git;a=patch;f=gcc/gcc.cc;h=1102fd64dbb76784ed46ff81bf905f6c52d296fc +--- + gcc/gcc.c | 51 +++++++++++++++++++++++++++++---------------------- + 1 file changed, 29 insertions(+), 22 deletions(-) + +diff --git a/gcc/gcc.c b/gcc/gcc.c +index 3b87e91b6..3c46e0769 100644 +--- a/gcc/gcc.c ++++ b/gcc/gcc.c +@@ -354,12 +354,12 @@ static inline void mark_matching_switches (const char *, const char *, int); + static inline void process_marked_switches (void); + static const char *process_brace_body (const char *, const char *, const char *, int, int); + static const struct spec_function *lookup_spec_function (const char *); +-static const char *eval_spec_function (const char *, const char *); +-static const char *handle_spec_function (const char *, bool *); ++static const char *eval_spec_function (const char *, const char *, const char *); ++static const char *handle_spec_function (const char *, bool *, const char *); + static char *save_string (const char *, int); + static void set_collect_gcc_options (void); + static int do_spec_1 (const char *, int, const char *); +-static int do_spec_2 (const char *); ++static int do_spec_2 (const char *, const char *); + static void do_option_spec (const char *, const char *); + static void do_self_spec (const char *); + static const char *find_file (const char *); +@@ -4865,7 +4865,7 @@ do_spec (const char *spec) + { + int value; + +- value = do_spec_2 (spec); ++ value = do_spec_2 (spec, NULL); + + /* Force out any unfinished command. + If -pipe, this forces out the last command if it ended in `|'. */ +@@ -4884,8 +4884,11 @@ do_spec (const char *spec) + return value; + } + ++/* Process the spec SPEC, with SOFT_MATCHED_PART designating the current value ++ of a matched * pattern which may be re-injected by way of %*. */ ++ + static int +-do_spec_2 (const char *spec) ++do_spec_2 (const char *spec, const char *soft_matched_part) + { + int result; + +@@ -4898,14 +4901,13 @@ do_spec_2 (const char *spec) + input_from_pipe = 0; + suffix_subst = NULL; + +- result = do_spec_1 (spec, 0, NULL); ++ result = do_spec_1 (spec, 0, soft_matched_part); + + end_going_arg (); + + return result; + } + +- + /* Process the given spec string and add any new options to the end + of the switches/n_switches array. */ + +@@ -4963,7 +4965,7 @@ do_self_spec (const char *spec) + { + int i; + +- do_spec_2 (spec); ++ do_spec_2 (spec, NULL); + do_spec_1 (" ", 0, NULL); + + /* Mark % 1) + error ("spec failure: more than one arg to SYSROOT_SUFFIX_SPEC"); +@@ -7577,7 +7584,7 @@ driver::set_up_specs () const + /* Process sysroot_hdrs_suffix_spec. */ + if (*sysroot_hdrs_suffix_spec != 0 + && !no_sysroot_suffix +- && do_spec_2 (sysroot_hdrs_suffix_spec) == 0) ++ && do_spec_2 (sysroot_hdrs_suffix_spec, NULL) == 0) + { + if (argbuf.length () > 1) + error ("spec failure: more than one arg to SYSROOT_HEADERS_SUFFIX_SPEC"); +@@ -7587,7 +7594,7 @@ driver::set_up_specs () const + + /* Look for startfiles in the standard places. */ + if (*startfile_prefix_spec != 0 +- && do_spec_2 (startfile_prefix_spec) == 0 ++ && do_spec_2 (startfile_prefix_spec, NULL) == 0 + && do_spec_1 (" ", 0, NULL) == 0) + { + const char *arg; +@@ -9717,7 +9724,7 @@ compare_debug_dump_opt_spec_function (int arg, + fatal_error (input_location, + "too many arguments to %%:compare-debug-dump-opt"); + +- do_spec_2 ("%{fdump-final-insns=*:%*}"); ++ do_spec_2 ("%{fdump-final-insns=*:%*}", NULL); + do_spec_1 (" ", 0, NULL); + + if (argbuf.length () > 0 +@@ -9735,13 +9742,13 @@ compare_debug_dump_opt_spec_function (int arg, + + if (argbuf.length () > 0) + { +- do_spec_2 ("%{o*:%*}%{!o:%{!S:%b%O}%{S:%b.s}}"); ++ do_spec_2 ("%{o*:%*}%{!o:%{!S:%b%O}%{S:%b.s}}", NULL); + ext = ".gkd"; + } + else if (!compare_debug) + return NULL; + else +- do_spec_2 ("%g.gkd"); ++ do_spec_2 ("%g.gkd", NULL); + + do_spec_1 (" ", 0, NULL); + +@@ -9793,7 +9800,7 @@ compare_debug_self_opt_spec_function (int arg, + if (compare_debug >= 0) + return NULL; + +- do_spec_2 ("%{c|S:%{o*:%*}}"); ++ do_spec_2 ("%{c|S:%{o*:%*}}", NULL); + do_spec_1 (" ", 0, NULL); + + if (argbuf.length () > 0) +-- +2.39.3 + diff --git a/LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch b/LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch new file mode 100644 index 0000000..f8de504 --- /dev/null +++ b/LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch @@ -0,0 +1,101 @@ +From 08d337cc5186e47949b60e4b3eeebd1f763337e0 Mon Sep 17 00:00:00 2001 +From: Lixing +Date: Mon, 31 Jul 2023 09:46:12 +0800 +Subject: [PATCH 1/2] LoongArch: Remove NOOP_TRUNCATION and fix extendsidi2 + +We can safely convert value from inprec to outprec because we hold on +extention if needed. +--- + gcc/config/loongarch/loongarch.c | 11 -------- + gcc/config/loongarch/loongarch.md | 44 +++++++------------------------ + 2 files changed, 9 insertions(+), 46 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.c b/gcc/config/loongarch/loongarch.c +index a1dde5a0f..f8f96329c 100644 +--- a/gcc/config/loongarch/loongarch.c ++++ b/gcc/config/loongarch/loongarch.c +@@ -10313,14 +10313,6 @@ loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + return mode; + } + +-/* Implement TARGET_TRULY_NOOP_TRUNCATION. */ +- +-static bool +-loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) +-{ +- return !TARGET_64BIT || inprec <= 32 || outprec > 32; +-} +- + /* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info + for details about the frame layout. */ + +@@ -10940,9 +10932,6 @@ loongarch_prefetch_cookie (rtx write, rtx locality) + #undef TARGET_CAN_CHANGE_MODE_CLASS + #define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class + +-#undef TARGET_TRULY_NOOP_TRUNCATION +-#define TARGET_TRULY_NOOP_TRUNCATION loongarch_truly_noop_truncation +- + #undef TARGET_CONSTANT_ALIGNMENT + #define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 097c9f4db..a08c4a62c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1433,43 +1433,17 @@ + ;; + ;; .................... + +-(define_insn_and_split "extendsidi2" ++(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") +- (sign_extend:DI +- (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m,k")))] ++ (sign_extend:DI ++ (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k")))] + "TARGET_64BIT" +-{ +- switch (which_alternative) +- { +- case 0: +- return "#"; +- case 1: +- { +- rtx offset = XEXP (operands[1], 0); +- if (GET_CODE (offset) == PLUS) +- offset = XEXP (offset, 1); +- else +- offset = const0_rtx; +- if (const_arith_operand (offset, Pmode) || (offset == const0_rtx)) +- return "ld.w\t%0,%1"; +- else +- return "ldptr.w\t%0,%1"; +- } +- case 2: +- return "ld.w\t%0,%1"; +- case 3: +- return "ldx.w\t%0,%1"; +- default: +- gcc_unreachable (); +- } +-} +- "&& reload_completed && register_operand (operands[1], VOIDmode)" +- [(const_int 0)] +-{ +- emit_note (NOTE_INSN_DELETED); +- DONE; +-} +- [(set_attr "move_type" "move,load,load,load") ++ "@ ++ slli.w\t%0,%1,0 ++ ldptr.w\t%0,%1 ++ ld.w\t%0,%1 ++ ldx.w\t%0,%1" ++ [(set_attr "move_type" "sll0,load,load,load") + (set_attr "mode" "DI")]) + + (define_insn "extend2" +-- +2.39.3 + diff --git a/gcc.spec b/gcc.spec index b4e229f..1ec052b 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,4 +1,4 @@ -%define anolis_release .0.3 +%define anolis_release .0.4 %global DATE 20210514 %global gitrev a3253c88425835d5b339d6998a1110a66ccd8b44 %global gcc_version 8.5.0 @@ -291,6 +291,7 @@ Patch27: gcc8-libgfortran-default-values.patch Patch30: gcc8-rh1668903-1.patch Patch31: gcc8-rh1668903-2.patch Patch32: gcc8-rh1668903-3.patch +Patch33: Improve-specs-processing-to-allow-in-function-argume.patch Patch1000: nvptx-tools-no-ptxas.patch Patch1001: nvptx-tools-build.patch @@ -304,6 +305,8 @@ Patch1004: 0002-loongarch-fix-multilib-osdirnames-to-lib64.patch Patch1005: 0001-LoongArch-Fixup-configure-file-error.patch Patch1006: 0002-LoongArch-Rename-config-file-for-loongarch.patch Patch1007: LoongArch-Fix-atomic_exchange-expanding-PR107713.patch +Patch1008: 0001-Sync-to-gcc-8-vec-36.patch +Patch1009: LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch # On ARM EABI systems, we do want -gnueabi to be part of the @@ -886,6 +889,7 @@ to NVidia PTX capable devices if available. %patch30 -p0 -b .rh1668903-1~ %patch31 -p0 -b .rh1668903-2~ %patch32 -p0 -b .rh1668903-3~ +%patch33 -p0 -b .fixspec~ cd nvptx-tools-%{nvptx_tools_gitrev} %patch1000 -p1 -b .nvptx-tools-no-ptxas~ @@ -934,6 +938,8 @@ rm -f gcc/testsuite/go.test/test/chan/goroutines.go %patch1005 -p1 %patch1006 -p1 %patch1007 -p1 +%patch1008 -p1 +%patch1009 -p1 %endif %build @@ -1019,7 +1025,7 @@ CONFIGURE_OPTS="\ %ifarch ppc64le --enable-targets=powerpcle-linux \ %endif -%ifarch ppc64le %{mips} riscv64 s390x loongarch64 +%ifarch ppc64le %{mips} riscv64 s390x --disable-multilib \ %else %if 0%{?anolis} @@ -1030,7 +1036,7 @@ CONFIGURE_OPTS="\ %endif --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \ --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only \ -%ifnarch %{mips} loongarch64 +%ifnarch %{mips} --with-linker-hash-style=gnu \ %endif --enable-plugin --enable-initfini-array \ @@ -1052,8 +1058,12 @@ CONFIGURE_OPTS="\ --with-arch=loongarch64 \ --with-abi=lp64 \ --enable-tls \ + --with-long-double-128 \ + --disable-multilib \ + --enable-initfini-array \ --enable-gnu-indirect-function \ --disable-emultls \ + --with-linker-hash-style=gnu \ %endif %if 0%{?fedora} >= 21 || 0%{?rhel} >= 7 @@ -1067,7 +1077,7 @@ CONFIGURE_OPTS="\ %ifarch ppc ppc64 ppc64le ppc64p7 --enable-secureplt \ %endif -%ifarch sparc sparcv9 sparc64 ppc ppc64 ppc64le ppc64p7 s390 s390x alpha loongarch64 +%ifarch sparc sparcv9 sparc64 ppc ppc64 ppc64le ppc64p7 s390 s390x alpha --with-long-double-128 \ %endif %ifarch sparc @@ -3240,6 +3250,9 @@ fi %endif %changelog +* Wed Jul 19 2023 Xing Li 8.5.0-10.1.0.4 +- Sync code to vec.36. (lixing@loongson.cn) + * Fri Dec 2 2022 Xing Li 8.5.0-10.1.0.3 - rename mt file for LoongArch. (lixing@loongson.cn) - Fixup LoongArch atomic_exchange error. (lixing@loongson.cn) -- Gitee