From 6df3f8c8471d444018ff3e92091592520707b523 Mon Sep 17 00:00:00 2001 From: Li Xing Date: Thu, 24 Mar 2022 17:09:25 +0800 Subject: [PATCH] [LOONGARCH] Add Loongarch backend support Signed-off-by: Li Xing --- 0033-Add-Loongarch-backend-support.patch | 23019 +++++++++++++++++++++ gcc.spec | 11 +- 2 files changed, 23028 insertions(+), 2 deletions(-) create mode 100644 0033-Add-Loongarch-backend-support.patch diff --git a/0033-Add-Loongarch-backend-support.patch b/0033-Add-Loongarch-backend-support.patch new file mode 100644 index 0000000..748a7af --- /dev/null +++ b/0033-Add-Loongarch-backend-support.patch @@ -0,0 +1,23019 @@ +diff -uNr gcc-10.3.0.org/config.guess gcc-10.3.0/config.guess +--- gcc-10.3.0.org/config.guess 2021-04-08 19:56:27.573734691 +0800 ++++ gcc-10.3.0/config.guess 2022-03-23 17:40:29.339280184 +0800 +@@ -980,6 +980,9 @@ + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; ++ loongarch*:Linux:*:*) ++ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" ++ exit ;; + m32r*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; +diff -uNr gcc-10.3.0.org/config.sub gcc-10.3.0/config.sub +--- gcc-10.3.0.org/config.sub 2021-04-08 19:56:27.573734691 +0800 ++++ gcc-10.3.0/config.sub 2022-03-23 17:40:29.339280184 +0800 +@@ -1183,6 +1183,7 @@ + | k1om \ + | le32 | le64 \ + | lm32 \ ++ | loongarch32 | loongarchx32 | loongarch64 \ + | m32c | m32r | m32rle \ + | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \ + | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \ +diff -uNr gcc-10.3.0.org/configure gcc-10.3.0/configure +--- gcc-10.3.0.org/configure 2022-03-23 17:25:14.304350355 +0800 ++++ gcc-10.3.0/configure 2022-03-28 11:15:09.028369464 +0800 +@@ -3029,7 +3029,7 @@ + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +@@ -3641,6 +3641,9 @@ + i[3456789]86-*-*) + libgloss_dir=i386 + ;; ++ loongarch*-*-*) ++ libgloss_dir=loongarch ++ ;; + m68hc11-*-*|m6811-*-*|m68hc12-*-*|m6812-*-*) + libgloss_dir=m68hc11 + ;; +@@ -4025,6 +4028,11 @@ + wasm32-*-*) + noconfigdirs="$noconfigdirs ld" + ;; ++ loongarch*-*-linux*) ++ ;; ++ loongarch*-*-*) ++ noconfigdirs="$noconfigdirs gprof" ++ ;; + esac + + # If we aren't building newlib, then don't build libgloss, since libgloss +@@ -7134,6 +7142,9 @@ + mips*-*-*linux* | mips*-*-gnu*) + target_makefile_frag="config/mt-mips-gnu" + ;; ++ loongarch*-*-*linux* | loongarch*-*-gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; + nios2-*-elf*) + target_makefile_frag="config/mt-nios2-elf" + ;; +diff -uNr gcc-10.3.0.org/configure.ac gcc-10.3.0/configure.ac +--- gcc-10.3.0.org/configure.ac 2022-03-23 17:25:14.304350355 +0800 ++++ gcc-10.3.0/configure.ac 2022-03-23 17:40:29.342280160 +0800 +@@ -345,7 +345,7 @@ + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +@@ -914,6 +914,9 @@ + i[[3456789]]86-*-*) + libgloss_dir=i386 + ;; ++ loongarch*-*-*) ++ libgloss_dir=loongarch ++ ;; + m68hc11-*-*|m6811-*-*|m68hc12-*-*|m6812-*-*) + libgloss_dir=m68hc11 + ;; +@@ -1298,6 +1301,11 @@ + wasm32-*-*) + noconfigdirs="$noconfigdirs ld" + ;; ++ loongarch*-*-linux*) ++ ;; ++ loongarch*-*-*) ++ noconfigdirs="$noconfigdirs gprof" ++ ;; + esac + + # If we aren't building newlib, then don't build libgloss, since libgloss +@@ -2639,6 +2647,9 @@ + mips*-*-*linux* | mips*-*-gnu*) + target_makefile_frag="config/mt-mips-gnu" + ;; ++ loongarch*-*-*linux* | loongarch*-*-gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; + nios2-*-elf*) + target_makefile_frag="config/mt-nios2-elf" + ;; +diff -uNr gcc-10.3.0.org/contrib/config-list.mk gcc-10.3.0/contrib/config-list.mk +--- gcc-10.3.0.org/contrib/config-list.mk 2021-04-08 19:56:27.581734786 +0800 ++++ gcc-10.3.0/contrib/config-list.mk 2022-03-23 17:40:29.342280160 +0800 +@@ -57,7 +57,10 @@ + i686-wrs-vxworksae \ + i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elf \ + ia64-freebsd6 ia64-linux ia64-hpux ia64-hp-vms iq2000-elf lm32-elf \ +- lm32-rtems lm32-uclinux m32c-rtems m32c-elf m32r-elf m32rle-elf \ ++ lm32-rtems lm32-uclinux \ ++ loongarch64-linux-gnu loongarch64-linux-gnuf64 \ ++ loongarch64-linux-gnuf32 loongarch64-linux-gnusf \ ++ m32c-rtems m32c-elf m32r-elf m32rle-elf \ + m32r-linux m32rle-linux m68k-elf m68k-netbsdelf \ + m68k-openbsd m68k-uclinux m68k-linux m68k-rtems \ + mcore-elf microblaze-linux microblaze-elf \ +diff -uNr gcc-10.3.0.org/gcc/cfg.h gcc-10.3.0/gcc/cfg.h +--- gcc-10.3.0.org/gcc/cfg.h 2021-04-08 19:56:28.021740099 +0800 ++++ gcc-10.3.0/gcc/cfg.h 2022-03-23 17:40:29.342280160 +0800 +@@ -21,6 +21,7 @@ + #define GCC_CFG_H + + #include "dominance.h" ++#include "function.h" + + /* What sort of profiling information we have. */ + enum profile_status_d +diff -uNr gcc-10.3.0.org/gcc/common/config/loongarch/loongarch-common.c gcc-10.3.0/gcc/common/config/loongarch/loongarch-common.c +--- gcc-10.3.0.org/gcc/common/config/loongarch/loongarch-common.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/common/config/loongarch/loongarch-common.c 2022-03-23 17:40:29.342280160 +0800 +@@ -0,0 +1,63 @@ ++/* Common hooks for LoongArch. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "common/common-target.h" ++#include "common/common-target-def.h" ++#include "opts.h" ++#include "flags.h" ++#include "diagnostic-core.h" ++ ++/* Implement TARGET_HANDLE_OPTION. */ ++ ++static bool ++loongarch_handle_option (struct gcc_options *opts, ++ struct gcc_options *opts_set ATTRIBUTE_UNUSED, ++ const struct cl_decoded_option *decoded, ++ location_t loc ATTRIBUTE_UNUSED) ++{ ++ size_t code = decoded->opt_index; ++ int value = decoded->value; ++ ++ switch (code) ++ { ++ case OPT_mmemcpy: ++ if (value) ++ { ++ if (opts->x_optimize_size) ++ opts->x_target_flags |= MASK_MEMCPY; ++ } ++ else ++ opts->x_target_flags &= ~MASK_MEMCPY; ++ return true; ++ ++ default: ++ return true; ++ } ++} ++ ++#undef TARGET_DEFAULT_TARGET_FLAGS ++#define TARGET_DEFAULT_TARGET_FLAGS MASK_CHECK_ZERO_DIV ++#undef TARGET_HANDLE_OPTION ++#define TARGET_HANDLE_OPTION loongarch_handle_option ++ ++struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; +diff -uNr gcc-10.3.0.org/gcc/config/host-linux.c gcc-10.3.0/gcc/config/host-linux.c +--- gcc-10.3.0.org/gcc/config/host-linux.c 2021-04-08 19:56:28.093740970 +0800 ++++ gcc-10.3.0/gcc/config/host-linux.c 2022-03-23 17:40:29.342280160 +0800 +@@ -98,6 +98,8 @@ + # define TRY_EMPTY_VM_SPACE 0x60000000 + #elif defined(__riscv) && defined (__LP64__) + # define TRY_EMPTY_VM_SPACE 0x1000000000 ++#elif defined(__loongarch__) && defined(__LP64__) ++# define TRY_EMPTY_VM_SPACE 0x8000000000 + #else + # define TRY_EMPTY_VM_SPACE 0 + #endif +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/constraints.md gcc-10.3.0/gcc/config/loongarch/constraints.md +--- gcc-10.3.0.org/gcc/config/loongarch/constraints.md 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/constraints.md 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,287 @@ ++;; Constraint definitions for LARCH. ++;; Copyright (C) 2006-2018 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++;; Register constraints ++ ++;; "a" A constant call global and noplt address. ++;; "b" ALL_REGS ++;; "c" A constant call local address. ++;; "d" GR_REGS ++;; "e" JALR_REGS ++;; "f" FP_REGS ++;; "g" * ++;; "h" A constant call plt address. ++;; "i" "Matches a general integer constant." ++;; "j" SIBCALL_REGS ++;; "k" - ++;; "l" "A signed 16-bit constant ." ++;; "m" "A memory operand whose address is formed by a base register and offset ++;; that is suitable for use in instructions with the same addressing mode ++;; as @code{st.w} and @code{ld.w}." ++;; "n" "Matches a non-symbolic integer constant." ++;; "o" "Matches an offsettable memory reference." ++;; "p" "Matches a general address." ++;; "q" LVZ_REGS ++;; "r" GENERAL_REGS ++;; "s" "Matches a symbolic integer constant." ++;; "t" A constant call weak address ++;; "u" - ++;; "v" - ++;; "w" "Matches any valid memory." ++;; "x" - ++;; "y" GR_REGS ++;; "z" ST_REGS ++;; "A" - ++;; "B" - ++;; "C" - ++;; "D" - ++;; "E" "Matches a floating-point constant." ++;; "F" "Matches a floating-point constant." ++;; "G" "Floating-point zero." ++;; "H" - ++;; "I" "A signed 12-bit constant (for arithmetic instructions)." ++;; "J" "Integer zero." ++;; "K" "An unsigned 12-bit constant (for logic instructions)." ++;; "L" "A signed 32-bit constant in which the lower 12 bits are zero. ++;; "M" "A constant that cannot be loaded using @code{lui}, @code{addiu} or @code{ori}." ++;; "N" "A constant in the range -65535 to -1 (inclusive)." ++;; "O" "A signed 15-bit constant." ++;; "P" "A constant in the range 1 to 65535 (inclusive)." ++;; "Q" "A signed 12-bit constant" ++;; "R" "An address that can be used in a non-macro load or store." ++;; "S" "A constant call address." ++;; "T" - ++;; "U" - ++;; "V" "Matches a non-offsettable memory reference." ++;; "W" "A memory address based on a member of @code{BASE_REG_CLASS}. This is ++;; true for all references (although it can sometimes be implicit ++;; if @samp{!TARGET_EXPLICIT_RELOCS})." ++;; "X" "Matches anything." ++;; "Y" - ++;; "Yb" ++;; "Yd" ++;; "A constant @code{move_operand} that can be safely loaded into @code{$25} ++;; using @code{la}." ++;; "Yh" ++;; "Yw" ++;; "Yx" ++;; "Z" - ++;; "ZC" ++;; "A memory operand whose address is formed by a base register and offset ++;; that is suitable for use in instructions with the same addressing mode ++;; as @code{ll.w} and @code{sc.w}." ++;; "ZD" ++;; "An address suitable for a @code{prefetch} instruction, or for any other ++;; instruction with the same addressing mode as @code{prefetch}." ++;; "ZB" ++;; "An address that is held in a general-purpose register. ++;; The offset is zero" ++ ++ ++(define_constraint "c" ++ "@internal ++ A constant call local address." ++ (match_operand 0 "is_const_call_local_symbol")) ++ ++(define_constraint "a" ++ "@internal ++ A constant call global and noplt address." ++ (match_operand 0 "is_const_call_global_noplt_symbol")) ++ ++(define_constraint "h" ++ "@internal ++ A constant call plt address." ++ (match_operand 0 "is_const_call_plt_symbol")) ++ ++(define_constraint "t" ++ "@internal ++ A constant call weak address." ++ (match_operand 0 "is_const_call_weak_symbol")) ++ ++(define_register_constraint "d" "GR_REGS" ++ "A general-purpose register. This is equivalent to @code{r}.") ++ ++(define_register_constraint "e" "JALR_REGS" ++ "@internal") ++ ++(define_register_constraint "q" "LVZ_REGS" ++ "A general-purpose register except for $r0 and $r1 for lvz.") ++ ++(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS" ++ "A floating-point register (if available).") ++ ++(define_register_constraint "b" "ALL_REGS" ++ "@internal") ++ ++(define_register_constraint "j" "SIBCALL_REGS" ++ "@internal") ++ ++(define_constraint "l" ++ "A signed 16-bit constant ." ++ (and (match_code "const_int") ++ (match_test "IMM16_OPERAND (ival)"))) ++ ++(define_register_constraint "y" "GR_REGS" ++ "Equivalent to @code{r}; retained for backwards compatibility.") ++ ++(define_register_constraint "z" "ST_REGS" ++ "A floating-point condition code register.") ++ ++;; Integer constraints ++ ++(define_constraint "I" ++ "A signed 12-bit constant (for arithmetic instructions)." ++ (and (match_code "const_int") ++ (match_test "SMALL_OPERAND (ival)"))) ++ ++(define_constraint "J" ++ "Integer zero." ++ (and (match_code "const_int") ++ (match_test "ival == 0"))) ++ ++(define_constraint "K" ++ "An unsigned 12-bit constant (for logic instructions)." ++ (and (match_code "const_int") ++ (match_test "SMALL_OPERAND_UNSIGNED (ival)"))) ++ ++(define_constraint "u" ++ "An unsigned 12-bit constant (for logic instructions)." ++ (and (match_code "const_int") ++ (match_test "LU32I_OPERAND (ival)"))) ++ ++(define_constraint "v" ++ "An unsigned 12-bit constant (for logic instructions)." ++ (and (match_code "const_int") ++ (match_test "LU52I_OPERAND (ival)"))) ++ ++(define_constraint "L" ++ "A signed 32-bit constant in which the lower 12 bits are zero. ++ Such constants can be loaded using @code{lui}." ++ (and (match_code "const_int") ++ (match_test "LUI_OPERAND (ival)"))) ++ ++(define_constraint "M" ++ "A constant that cannot be loaded using @code{lui}, @code{addiu} ++ or @code{ori}." ++ (and (match_code "const_int") ++ (not (match_test "SMALL_OPERAND (ival)")) ++ (not (match_test "SMALL_OPERAND_UNSIGNED (ival)")) ++ (not (match_test "LUI_OPERAND (ival)")))) ++ ++(define_constraint "N" ++ "A constant in the range -65535 to -1 (inclusive)." ++ (and (match_code "const_int") ++ (match_test "ival >= -0xffff && ival < 0"))) ++ ++(define_constraint "O" ++ "A signed 15-bit constant." ++ (and (match_code "const_int") ++ (match_test "ival >= -0x4000 && ival < 0x4000"))) ++ ++(define_constraint "P" ++ "A constant in the range 1 to 65535 (inclusive)." ++ (and (match_code "const_int") ++ (match_test "ival > 0 && ival < 0x10000"))) ++ ++;; Floating-point constraints ++ ++(define_constraint "G" ++ "Floating-point zero." ++ (and (match_code "const_double") ++ (match_test "op == CONST0_RTX (mode)"))) ++ ++;; General constraints ++ ++(define_constraint "Q" ++ "@internal" ++ (match_operand 0 "const_arith_operand")) ++ ++(define_memory_constraint "R" ++ "An address that can be used in a non-macro load or store." ++ (and (match_code "mem") ++ (match_test "loongarch_address_insns (XEXP (op, 0), mode, false) == 1"))) ++ ++(define_memory_constraint "m" ++ "A memory operand whose address is formed by a base register and offset ++ that is suitable for use in instructions with the same addressing mode ++ as @code{st.w} and @code{ld.w}." ++ (and (match_code "mem") ++ (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), mode)"))) ++ ++(define_constraint "S" ++ "@internal ++ A constant call address." ++ (and (match_operand 0 "call_insn_operand") ++ (match_test "CONSTANT_P (op)"))) ++ ++(define_memory_constraint "W" ++ "@internal ++ A memory address based on a member of @code{BASE_REG_CLASS}. This is ++ true for allreferences (although it can sometimes be implicit ++ if @samp{!TARGET_EXPLICIT_RELOCS})." ++ (and (match_code "mem") ++ (match_operand 0 "memory_operand") ++ (and (not (match_operand 0 "stack_operand")) ++ (not (match_test "CONSTANT_P (XEXP (op, 0))"))))) ++ ++(define_constraint "Yb" ++ "@internal" ++ (match_operand 0 "qi_mask_operand")) ++ ++(define_constraint "Yd" ++ "@internal ++ A constant @code{move_operand} that can be safely loaded into @code{$25} ++ using @code{la}." ++ (and (match_operand 0 "move_operand") ++ (match_test "CONSTANT_P (op)"))) ++ ++(define_constraint "Yh" ++ "@internal" ++ (match_operand 0 "hi_mask_operand")) ++ ++(define_constraint "Yw" ++ "@internal" ++ (match_operand 0 "si_mask_operand")) ++ ++(define_constraint "Yx" ++ "@internal" ++ (match_operand 0 "low_bitmask_operand")) ++ ++(define_memory_constraint "ZC" ++ "A memory operand whose address is formed by a base register and offset ++ that is suitable for use in instructions with the same addressing mode ++ as @code{ll.w} and @code{sc.w}." ++ (and (match_code "mem") ++ (match_test "loongarch_14bit_shifted_offset_address_p (XEXP (op, 0), mode)"))) ++ ++;;(define_address_constraint "ZD" ++;; "An address suitable for a @code{prefetch} instruction, or for any other ++;; instruction with the same addressing mode as @code{prefetch}." ++;; (if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT") ++;; (match_test "loongarch_9bit_offset_address_p (op, mode)") ++;; (match_test "loongarch_address_insns (op, mode, false)"))) ++ ++ ++(define_memory_constraint "ZB" ++ "@internal ++ An address that is held in a general-purpose register. ++ The offset is zero" ++ (and (match_code "mem") ++ (match_test "GET_CODE(XEXP(op,0)) == REG"))) ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/driver-native.c gcc-10.3.0/gcc/config/loongarch/driver-native.c +--- gcc-10.3.0.org/gcc/config/loongarch/driver-native.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/driver-native.c 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,82 @@ ++/* Subroutines for the gcc driver. ++ Copyright (C) 2008-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++ ++ ++/* This function must set to noinline. Otherwise the arg can not be passed. */ ++int loongson_cpucfg (int arg) ++{ ++ int ret; ++ __asm__ __volatile__ ("cpucfg %0,%1\n\t" /* cpucfg $2,$4. */ ++ :"=r"(ret) ++ :"r"(arg) ++ :); ++ return ret; ++} ++ ++/* This will be called by the spec parser in gcc.c when it sees ++ a %:local_cpu_detect(args) construct. Currently it will be called ++ with either "arch" or "tune" as argument depending on if -march=native ++ or -mtune=native is to be substituted. ++ ++ It returns a string containing new command line parameters to be ++ put at the place of the above two options, depending on what CPU ++ this is executed. E.g. "-march=loongson2f" on a Loongson 2F for ++ -march=native. If the routine can't detect a known processor, ++ the -march or -mtune option is discarded. ++ ++ ARGC and ARGV are set depending on the actual arguments given ++ in the spec. */ ++const char * ++host_detect_local_cpu (int argc, const char **argv) ++{ ++ const char *cpu = NULL; ++ bool arch; ++ int cpucfg_arg; ++ int cpucfg_ret; ++ ++ if (argc < 1) ++ return NULL; ++ ++ arch = strcmp (argv[0], "arch") == 0; ++ if (!arch && strcmp (argv[0], "tune")) ++ return NULL; ++ ++ cpucfg_arg = 0; ++ cpucfg_ret = loongson_cpucfg (cpucfg_arg); ++ if (((cpucfg_ret >> 16) & 0xff) == 0x14) ++ { ++ if (((cpucfg_ret >> 8) & 0xff) == 0xc0) ++ cpu = "gs464v"; ++ else ++ cpu = NULL; ++ } ++ ++ ++ if (cpu == NULL) ++ return NULL; ++ ++ return concat ("-m", argv[0], "=", cpu, NULL); ++} +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/elf.h gcc-10.3.0/gcc/config/loongarch/elf.h +--- gcc-10.3.0.org/gcc/config/loongarch/elf.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/elf.h 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,50 @@ ++/* Target macros for loongarch*-elf targets. ++ Copyright (C) 1994-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* LARCH assemblers don't have the usual .set foo,bar construct; ++ .set is used for assembler options instead. */ ++#undef SET_ASM_OP ++#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2) \ ++ do \ ++ { \ ++ fputc ('\t', FILE); \ ++ assemble_name (FILE, LABEL1); \ ++ fputs (" = ", FILE); \ ++ assemble_name (FILE, LABEL2); \ ++ fputc ('\n', FILE); \ ++ } \ ++ while (0) ++ ++#undef ASM_DECLARE_OBJECT_NAME ++#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name ++ ++#undef ASM_FINISH_DECLARE_OBJECT ++#define ASM_FINISH_DECLARE_OBJECT loongarch_finish_declare_object ++ ++/* Leave the linker script to choose the appropriate libraries. */ ++#undef LIB_SPEC ++#define LIB_SPEC "" ++ ++#undef STARTFILE_SPEC ++#define STARTFILE_SPEC "crti%O%s crtbegin%O%s" ++ ++#undef ENDFILE_SPEC ++#define ENDFILE_SPEC "crtend%O%s crtn%O%s" ++ ++#define NO_IMPLICIT_EXTERN_C 1 +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/frame-header-opt.c gcc-10.3.0/gcc/config/loongarch/frame-header-opt.c +--- gcc-10.3.0.org/gcc/config/loongarch/frame-header-opt.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/frame-header-opt.c 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,292 @@ ++/* Analyze functions to determine if callers need to allocate a frame header ++ on the stack. The frame header is used by callees to save their arguments. ++ This optimization is specific to TARGET_OLDABI targets. For TARGET_NEWABI ++ targets, if a frame header is required, it is allocated by the callee. ++ ++ ++ Copyright (C) 2015-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "context.h" ++#include "coretypes.h" ++#include "tree.h" ++#include "tree-core.h" ++#include "tree-pass.h" ++#include "target.h" ++#include "target-globals.h" ++#include "profile-count.h" ++#include "function.h" ++#include "cfg.h" ++#include "cgraph.h" ++#include "basic-block.h" ++#include "gimple.h" ++#include "gimple-iterator.h" ++#include "gimple-walk.h" ++ ++static unsigned int frame_header_opt (void); ++ ++namespace { ++ ++const pass_data pass_data_ipa_frame_header_opt = ++{ ++ IPA_PASS, /* type */ ++ "frame-header-opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_CGRAPHOPT, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_ipa_frame_header_opt : public ipa_opt_pass_d ++{ ++public: ++ pass_ipa_frame_header_opt (gcc::context *ctxt) ++ : ipa_opt_pass_d (pass_data_ipa_frame_header_opt, ctxt, ++ NULL, /* generate_summary */ ++ NULL, /* write_summary */ ++ NULL, /* read_summary */ ++ NULL, /* write_optimization_summary */ ++ NULL, /* read_optimization_summary */ ++ NULL, /* stmt_fixup */ ++ 0, /* function_transform_todo_flags_start */ ++ NULL, /* function_transform */ ++ NULL) /* variable_transform */ ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ /* This optimization has no affect if TARGET_NEWABI. If optimize ++ is not at least 1 then the data needed for the optimization is ++ not available and nothing will be done anyway. */ ++ return TARGET_OLDABI && flag_frame_header_optimization && optimize > 0; ++ } ++ ++ virtual unsigned int execute (function *) { return frame_header_opt (); } ++ ++}; // class pass_ipa_frame_header_opt ++ ++} // anon namespace ++ ++static ipa_opt_pass_d * ++make_pass_ipa_frame_header_opt (gcc::context *ctxt) ++{ ++ return new pass_ipa_frame_header_opt (ctxt); ++} ++ ++void ++loongarch_register_frame_header_opt (void) ++{ ++ opt_pass *p = make_pass_ipa_frame_header_opt (g); ++ struct register_pass_info f = { p, "comdats", 1, PASS_POS_INSERT_AFTER }; ++ register_pass (&f); ++} ++ ++ ++/* Return true if it is certain that this is a leaf function. False if it is ++ not a leaf function or if it is impossible to tell. */ ++ ++static bool ++is_leaf_function (function *fn) ++{ ++ basic_block bb; ++ gimple_stmt_iterator gsi; ++ ++ /* If we do not have a cfg for this function be conservative and assume ++ it is not a leaf function. */ ++ if (fn->cfg == NULL) ++ return false; ++ ++ FOR_EACH_BB_FN (bb, fn) ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ if (is_gimple_call (gsi_stmt (gsi))) ++ return false; ++ return true; ++} ++ ++/* Return true if this function has inline assembly code or if we cannot ++ be certain that it does not. False if we know that there is no inline ++ assembly. */ ++ ++static bool ++has_inlined_assembly (function *fn) ++{ ++ basic_block bb; ++ gimple_stmt_iterator gsi; ++ ++ /* If we do not have a cfg for this function be conservative and assume ++ it is may have inline assembly. */ ++ if (fn->cfg == NULL) ++ return true; ++ ++ FOR_EACH_BB_FN (bb, fn) ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ if (gimple_code (gsi_stmt (gsi)) == GIMPLE_ASM) ++ return true; ++ ++ return false; ++} ++ ++/* Return true if this function will use the stack space allocated by its ++ caller or if we cannot determine for certain that it does not. */ ++ ++static bool ++needs_frame_header_p (function *fn) ++{ ++ tree t; ++ ++ if (fn->decl == NULL) ++ return true; ++ ++ if (fn->stdarg) ++ return true; ++ ++ for (t = DECL_ARGUMENTS (fn->decl); t; t = TREE_CHAIN (t)) ++ { ++ if (!use_register_for_decl (t)) ++ return true; ++ ++ /* Some 64-bit types may get copied to general registers using the frame ++ header, see loongarch_output_64bit_xfer. Checking for SImode only may be ++ overly restrictive but it is guaranteed to be safe. */ ++ if (DECL_MODE (t) != SImode) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Return true if the argument stack space allocated by function FN is used. ++ Return false if the space is needed or if the need for the space cannot ++ be determined. */ ++ ++static bool ++callees_functions_use_frame_header (function *fn) ++{ ++ basic_block bb; ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ tree called_fn_tree; ++ function *called_fn; ++ ++ if (fn->cfg == NULL) ++ return true; ++ ++ FOR_EACH_BB_FN (bb, fn) ++ { ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (is_gimple_call (stmt)) ++ { ++ called_fn_tree = gimple_call_fndecl (stmt); ++ if (called_fn_tree != NULL) ++ { ++ called_fn = DECL_STRUCT_FUNCTION (called_fn_tree); ++ if (called_fn == NULL ++ || DECL_WEAK (called_fn_tree) ++ || has_inlined_assembly (called_fn) ++ || !is_leaf_function (called_fn) ++ || !called_fn->machine->does_not_use_frame_header) ++ return true; ++ } ++ else ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ ++/* Set the callers_may_not_allocate_frame flag for any function which ++ function FN calls because FN may not allocate a frame header. */ ++ ++static void ++set_callers_may_not_allocate_frame (function *fn) ++{ ++ basic_block bb; ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ tree called_fn_tree; ++ function *called_fn; ++ ++ if (fn->cfg == NULL) ++ return; ++ ++ FOR_EACH_BB_FN (bb, fn) ++ { ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (is_gimple_call (stmt)) ++ { ++ called_fn_tree = gimple_call_fndecl (stmt); ++ if (called_fn_tree != NULL) ++ { ++ called_fn = DECL_STRUCT_FUNCTION (called_fn_tree); ++ if (called_fn != NULL) ++ called_fn->machine->callers_may_not_allocate_frame = true; ++ } ++ } ++ } ++ } ++ return; ++} ++ ++/* Scan each function to determine those that need its frame headers. Perform ++ a second scan to determine if the allocation can be skipped because none of ++ their callees require the frame header. */ ++ ++static unsigned int ++frame_header_opt () ++{ ++ struct cgraph_node *node; ++ function *fn; ++ ++ FOR_EACH_DEFINED_FUNCTION (node) ++ { ++ fn = node->get_fun (); ++ if (fn != NULL) ++ fn->machine->does_not_use_frame_header = !needs_frame_header_p (fn); ++ } ++ ++ FOR_EACH_DEFINED_FUNCTION (node) ++ { ++ fn = node->get_fun (); ++ if (fn != NULL) ++ fn->machine->optimize_call_stack ++ = !callees_functions_use_frame_header (fn) && !is_leaf_function (fn); ++ } ++ ++ FOR_EACH_DEFINED_FUNCTION (node) ++ { ++ fn = node->get_fun (); ++ if (fn != NULL && fn->machine->optimize_call_stack) ++ set_callers_may_not_allocate_frame (fn); ++ } ++ ++ return 0; ++} +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/generic.md gcc-10.3.0/gcc/config/loongarch/generic.md +--- gcc-10.3.0.org/gcc/config/loongarch/generic.md 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/generic.md 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,109 @@ ++;; Generic DFA-based pipeline description for LARCH targets ++;; Copyright (C) 2004-2018 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++ ++;; This file is derived from the old define_function_unit description. ++;; Each reservation can be overridden on a processor-by-processor basis. ++ ++(define_insn_reservation "generic_alu" 1 ++ (eq_attr "type" "unknown,prefetch,prefetchx,condmove,const,arith, ++ shift,slt,clz,trap,multi,nop,logical,signext,move") ++ "alu") ++ ++(define_insn_reservation "generic_load" 3 ++ (eq_attr "type" "load,fpload,fpidxload") ++ "alu") ++ ++(define_insn_reservation "generic_store" 1 ++ (eq_attr "type" "store,fpstore,fpidxstore") ++ "alu") ++ ++(define_insn_reservation "generic_xfer" 2 ++ (eq_attr "type" "mftg,mgtf") ++ "alu") ++ ++(define_insn_reservation "generic_branch" 1 ++ (eq_attr "type" "branch,jump,call") ++ "alu") ++ ++(define_insn_reservation "generic_imul" 17 ++ (eq_attr "type" "imul,imul3") ++ "imuldiv*17") ++ ++(define_insn_reservation "generic_fcvt" 1 ++ (eq_attr "type" "fcvt") ++ "alu") ++ ++(define_insn_reservation "generic_fmove" 2 ++ (eq_attr "type" "fabs,fneg,fmove") ++ "alu") ++ ++(define_insn_reservation "generic_fcmp" 3 ++ (eq_attr "type" "fcmp") ++ "alu") ++ ++(define_insn_reservation "generic_fadd" 4 ++ (eq_attr "type" "fadd") ++ "alu") ++ ++(define_insn_reservation "generic_fmul_single" 7 ++ (and (eq_attr "type" "fmul,fmadd") ++ (eq_attr "mode" "SF")) ++ "alu") ++ ++(define_insn_reservation "generic_fmul_double" 8 ++ (and (eq_attr "type" "fmul,fmadd") ++ (eq_attr "mode" "DF")) ++ "alu") ++ ++(define_insn_reservation "generic_fdiv_single" 23 ++ (and (eq_attr "type" "fdiv,frdiv") ++ (eq_attr "mode" "SF")) ++ "alu") ++ ++(define_insn_reservation "generic_fdiv_double" 36 ++ (and (eq_attr "type" "fdiv,frdiv") ++ (eq_attr "mode" "DF")) ++ "alu") ++ ++(define_insn_reservation "generic_fsqrt_single" 54 ++ (and (eq_attr "type" "fsqrt,frsqrt") ++ (eq_attr "mode" "SF")) ++ "alu") ++ ++(define_insn_reservation "generic_fsqrt_double" 112 ++ (and (eq_attr "type" "fsqrt,frsqrt") ++ (eq_attr "mode" "DF")) ++ "alu") ++ ++(define_insn_reservation "generic_atomic" 10 ++ (eq_attr "type" "atomic") ++ "alu") ++ ++;; Sync loop consists of (in order) ++;; (1) optional sync, ++;; (2) LL instruction, ++;; (3) branch and 1-2 ALU instructions, ++;; (4) SC instruction, ++;; (5) branch and ALU instruction. ++;; The net result of this reservation is a big delay with a flush of ++;; ALU pipeline. ++(define_insn_reservation "generic_sync_loop" 40 ++ (eq_attr "type" "syncloop") ++ "alu*39") +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/genopt.sh gcc-10.3.0/gcc/config/loongarch/genopt.sh +--- gcc-10.3.0.org/gcc/config/loongarch/genopt.sh 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/genopt.sh 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,123 @@ ++#!/bin/sh ++# Generate loongarch-tables.opt from the list of CPUs in loongarch-cpus.def. ++# Copyright (C) 2011-2018 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++cat <. ++ ++Enum ++Name(loongarch_arch_opt_value) Type(int) ++Known LARCH CPUs (for use with the -march= and -mtune= options): ++ ++Enum ++Name(loongarch_loongarch_opt_value) Type(int) ++Known LARCH ISA levels (for use with the -loongarch option): ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(from-abi) Value(LARCH_ARCH_OPTION_FROM_ABI) ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly ++ ++EOF ++ ++awk -F'[(, ]+' ' ++BEGIN { ++ value = 0 ++} ++ ++# Write an entry for a single string accepted as a -march= argument. ++ ++function write_one_arch_value(name, value, flags) ++{ ++ print "EnumValue" ++ print "Enum(loongarch_arch_opt_value) String(" name ") Value(" value ")" flags ++ print "" ++ if (name ~ "^loongarch") { ++ sub("^loongarch", "", name) ++ print "EnumValue" ++ print "Enum(loongarch_loongarch_opt_value) String(" name ") Value(" value ")" ++ print "" ++ } ++} ++ ++# The logic for matching CPU name variants should be the same as in GAS. ++ ++# Write an entry for a single string accepted as a -march= argument, ++# plus any variant with a final "000" replaced by "k". ++ ++function write_arch_value_maybe_k(name, value, flags) ++{ ++ write_one_arch_value(name, value, flags) ++ if (name ~ "000$") { ++ sub("000$", "k", name) ++ write_one_arch_value(name, value, "") ++ } ++} ++ ++# Write all the entries for a -march= argument. In addition to ++# replacement of a final "000" with "k", an argument starting with ++# "vr", "rm" or "r" followed by a number, or just a plain number, ++# matches a plain number or "r" followed by a plain number. ++ ++function write_all_arch_values(name, value) ++{ ++ write_arch_value_maybe_k(name, value, " Canonical") ++ cname = name ++ if (cname ~ "^vr") { ++ sub("^vr", "", cname) ++ } else if (cname ~ "^rm") { ++ sub("^rm", "", cname) ++ } else if (cname ~ "^r") { ++ sub("^r", "", cname) ++ } ++ if (cname ~ "^[0-9]") { ++ if (cname != name) ++ write_arch_value_maybe_k(cname, value, "") ++ rname = "r" cname ++ if (rname != name) ++ write_arch_value_maybe_k(rname, value, "") ++ } ++} ++ ++/^LARCH_CPU/ { ++ name = $2 ++ gsub("\"", "", name) ++ write_all_arch_values(name, value) ++ value++ ++}' $1/loongarch-cpus.def +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/gnu-user.h gcc-10.3.0/gcc/config/loongarch/gnu-user.h +--- gcc-10.3.0.org/gcc/config/loongarch/gnu-user.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/gnu-user.h 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,132 @@ ++/* Definitions for LARCH systems using GNU userspace. ++ Copyright (C) 1998-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#undef WCHAR_TYPE ++#define WCHAR_TYPE "int" ++ ++#undef WCHAR_TYPE_SIZE ++#define WCHAR_TYPE_SIZE 32 ++ ++#undef ASM_DECLARE_OBJECT_NAME ++#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name ++ ++/* If we don't set MASK_ABICALLS, we can't default to PIC. */ ++/* #undef TARGET_DEFAULT */ ++/* #define TARGET_DEFAULT MASK_ABICALLS */ ++ ++#define TARGET_OS_CPP_BUILTINS() \ ++ do { \ ++ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ ++ /* The GNU C++ standard library requires this. */ \ ++ if (c_dialect_cxx ()) \ ++ builtin_define ("_GNU_SOURCE"); \ ++ } while (0) ++ ++#undef SUBTARGET_CPP_SPEC ++#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" ++ ++/* A standard GNU/Linux mapping. On most targets, it is included in ++ CC1_SPEC itself by config/linux.h, but loongarch.h overrides CC1_SPEC ++ and provides this hook instead. */ ++#undef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC GNU_USER_TARGET_CC1_SPEC ++ ++/* -G is incompatible with -KPIC which is the default, so only allow objects ++ in the small data section if the user explicitly asks for it. */ ++#undef LARCH_DEFAULT_GVALUE ++#define LARCH_DEFAULT_GVALUE 0 ++ ++#undef GNU_USER_TARGET_LINK_SPEC ++#define GNU_USER_TARGET_LINK_SPEC "\ ++ %{G*} %{EB} %{EL} %{shared} \ ++ %{!shared: \ ++ %{!static: \ ++ %{rdynamic:-export-dynamic} \ ++ %{mabi=lp32: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP32 "} \ ++ %{mabi=lp64: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP64 "}} \ ++ %{static}} \ ++ %{mabi=lp32:-m" GNU_USER_LINK_EMULATION32 "} \ ++ %{mabi=lp64:-m" GNU_USER_LINK_EMULATION64 "}" ++ ++#undef LINK_SPEC ++#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC ++ ++/* The LARCH assembler has different syntax for .set. We set it to ++ .dummy to trap any errors. */ ++#undef SET_ASM_OP ++#define SET_ASM_OP "\t.dummy\t" ++ ++#undef ASM_OUTPUT_DEF ++#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \ ++ do { \ ++ fputc ( '\t', FILE); \ ++ assemble_name (FILE, LABEL1); \ ++ fputs ( " = ", FILE); \ ++ assemble_name (FILE, LABEL2); \ ++ fputc ( '\n', FILE); \ ++ } while (0) ++ ++/* The glibc _mcount stub will save $v0 for us. Don't mess with saving ++ it, since ASM_OUTPUT_REG_PUSH/ASM_OUTPUT_REG_POP do not work in the ++ presence of $gp-relative calls. */ ++#undef ASM_OUTPUT_REG_PUSH ++#undef ASM_OUTPUT_REG_POP ++ ++#undef LIB_SPEC ++#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC ++ ++#define NO_SHARED_SPECS "" ++ ++/* -march=native handling only makes sense with compiler running on ++ a LARCH chip. */ ++#if defined(__loongarch__) ++extern const char *host_detect_local_cpu (int argc, const char **argv); ++# define EXTRA_SPEC_FUNCTIONS \ ++ { "local_cpu_detect", host_detect_local_cpu }, ++ ++# define MARCH_MTUNE_NATIVE_SPECS \ ++ " %{march=native:%. */ ++ ++#ifndef _GCC_LOONGARCH_BASE_INTRIN_H ++#define _GCC_LOONGARCH_BASE_INTRIN_H ++ ++#ifdef __cplusplus ++extern "C"{ ++#endif ++ ++typedef struct drdtime{ ++ unsigned long dvalue; ++ unsigned long dtimeid; ++} __drdtime_t; ++ ++typedef struct rdtime{ ++ unsigned int value; ++ unsigned int timeid; ++} __rdtime_t; ++ ++#ifdef __loongarch64 ++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtime_d (void) ++{ ++ __drdtime_t drdtime; ++ __asm__ volatile ( ++ "rdtime.d\t%[val],%[tid]\n\t" ++ : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) ++ : ++ ); ++ return drdtime; ++} ++#define __rdtime_d __builtin_loongarch_rdtime_d ++#endif ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimeh_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimeh.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++#define __rdtimel_w __builtin_loongarch_rdtimel_w ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimel_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimel.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++#define __rdtimeh_w __builtin_loongarch_rdtimeh_w ++ ++/* Assembly instruction format: rj, fcsr */ ++/* Data types in instruction templates: USI, UQI */ ++#define __movfcsr2gr(/*ui5*/_1) __builtin_loongarch_movfcsr2gr((_1)); ++ ++/* Assembly instruction format: 0, fcsr, rj */ ++/* Data types in instruction templates: VOID, UQI, USI */ ++#define __movgr2fcsr(/*ui5*/ _1, _2) __builtin_loongarch_movgr2fcsr((unsigned short)_1, (unsigned int)_2); ++ ++#ifdef __loongarch32 ++/* Assembly instruction format: ui5, rj, si12 */ ++/* Data types in instruction templates: VOID, USI, USI, SI */ ++#define __cacop(/*ui5*/ _1, /*unsigned int*/ _2, /*si12*/ _3) ((void)__builtin_loongarch_cacop((_1), (unsigned int)(_2), (_3))) ++#elif defined __loongarch64 ++/* Assembly instruction format: ui5, rj, si12 */ ++/* Data types in instruction templates: VOID, USI, UDI, SI */ ++#define __dcacop(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) ((void)__builtin_loongarch_dcacop((_1), (unsigned long int)(_2), (_3))) ++#else ++# error "Don't support this ABI." ++#endif ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: USI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned int __cpucfg(unsigned int _1) ++{ ++ return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); ++} ++ ++#ifdef __loongarch64 ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: DI, DI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtle_d(long int _1, long int _2) ++{ ++ __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: DI, DI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtgt_d(long int _1, long int _2) ++{ ++ __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++} ++#endif ++ ++#ifdef __loongarch32 ++/* Assembly instruction format: rd, rj, ui5 */ ++/* Data types in instruction templates: SI, SI, UQI */ ++#define __lddir(/*int*/ _1, /*ui5*/ _2) ((int)__builtin_loongarch_lddir((int)(_1), (_2))) ++#elif defined __loongarch64 ++/* Assembly instruction format: rd, rj, ui5 */ ++/* Data types in instruction templates: DI, DI, UQI */ ++#define __dlddir(/*long int*/ _1, /*ui5*/ _2) ((long int)__builtin_loongarch_dlddir((long int)(_1), (_2))) ++#else ++# error "Don't support this ABI." ++#endif ++ ++#ifdef __loongarch32 ++/* Assembly instruction format: rj, ui5 */ ++/* Data types in instruction templates: VOID, SI, UQI */ ++#define __ldpte(/*int*/ _1, /*ui5*/ _2) ((void)__builtin_loongarch_ldpte((int)(_1), (_2))) ++#elif defined __loongarch64 ++/* Assembly instruction format: rj, ui5 */ ++/* Data types in instruction templates: VOID, DI, UQI */ ++#define __dldpte(/*long int*/ _1, /*ui5*/ _2) ((void)__builtin_loongarch_dldpte((long int)(_1), (_2))) ++#else ++# error "Don't support this ABI." ++#endif ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, QI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, HI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, SI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++} ++ ++#ifdef __loongarch64 ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, DI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++} ++#endif ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, QI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, HI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, SI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); ++} ++ ++#ifdef __loongarch64 ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: SI, DI, SI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); ++} ++#endif ++ ++/* Assembly instruction format: rd, ui14 */ ++/* Data types in instruction templates: USI, USI */ ++#define __csrrd(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd((_1))) ++ ++/* Assembly instruction format: rd, ui14 */ ++/* Data types in instruction templates: USI, USI, USI */ ++#define __csrwr(/*unsigned int*/ _1, /*ui14*/ _2) ((unsigned int)__builtin_loongarch_csrwr((unsigned int)(_1), (_2))) ++ ++/* Assembly instruction format: rd, rj, ui14 */ ++/* Data types in instruction templates: USI, USI, USI, USI */ ++#define __csrxchg(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) ((unsigned int)__builtin_loongarch_csrxchg((unsigned int)(_1), (unsigned int)(_2), (_3))) ++ ++#ifdef __loongarch64 ++/* Assembly instruction format: rd, ui14 */ ++/* Data types in instruction templates: UDI, USI */ ++#define __dcsrrd(/*ui14*/ _1) ((unsigned long int)__builtin_loongarch_dcsrrd((_1))) ++ ++/* Assembly instruction format: rd, ui14 */ ++/* Data types in instruction templates: UDI, UDI, USI */ ++#define __dcsrwr(/*unsigned long int*/ _1, /*ui14*/ _2) ((unsigned long int)__builtin_loongarch_dcsrwr((unsigned long int)(_1), (_2))) ++ ++/* Assembly instruction format: rd, rj, ui14 */ ++/* Data types in instruction templates: UDI, UDI, UDI, USI */ ++#define __dcsrxchg(/*unsigned long int*/ _1, /*unsigned long int*/ _2, /*ui14*/ _3) ((unsigned long int)__builtin_loongarch_dcsrxchg((unsigned long int)(_1), (unsigned long int)(_2), (_3))) ++#endif ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: UQI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned char __iocsrrd_b(unsigned int _1) ++{ ++ return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: UHI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned short __iocsrrd_h(unsigned int _1) ++{ ++ return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: USI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned int __iocsrrd_w(unsigned int _1) ++{ ++ return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); ++} ++ ++#ifdef __loongarch64 ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: UDI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned long int __iocsrrd_d(unsigned int _1) ++{ ++ return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); ++} ++#endif ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: VOID, UQI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_b(unsigned char _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: VOID, UHI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_h(unsigned short _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: VOID, USI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_w(unsigned int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); ++} ++ ++#ifdef __loongarch64 ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: VOID, UDI, USI */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_d(unsigned long int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++} ++#endif ++ ++/* Assembly instruction format: ui15 */ ++/* Data types in instruction templates: UQI */ ++#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) ++ ++/* Assembly instruction format: ui15 */ ++/* Data types in instruction templates: UQI */ ++#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) ++ ++#define __builtin_loongarch_syscall(a) \ ++{ \ ++ __asm__ volatile ("syscall %0\n\t" \ ++ ::"I"(a)); \ ++} ++#define __syscall __builtin_loongarch_syscall ++ ++#define __builtin_loongarch_break(a) \ ++{ \ ++ __asm__ volatile ("break %0\n\t" \ ++ ::"I"(a)); \ ++} ++#define __break __builtin_loongarch_break ++ ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_tlbsrch (void) ++{ ++ __asm__ volatile ("tlbsrch\n\t"); ++} ++#define __tlbsrch __builtin_loongarch_tlbsrch ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_tlbrd (void) ++{ ++ __asm__ volatile ("tlbrd\n\t"); ++} ++#define __tlbrd __builtin_loongarch_tlbrd ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_tlbwr (void) ++{ ++ __asm__ volatile ("tlbwr\n\t"); ++} ++#define __tlbwr __builtin_loongarch_tlbwr ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_tlbfill (void) ++{ ++ __asm__ volatile ("tlbfill\n\t"); ++} ++#define __tlbfill __builtin_loongarch_tlbfill ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_tlbclr (void) ++{ ++ __asm__ volatile ("tlbclr\n\t"); ++} ++#define __tlbclr __builtin_loongarch_tlbclr ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_tlbflush (void) ++{ ++ __asm__ volatile ("tlbflush\n\t"); ++} ++#define __tlbflush __builtin_loongarch_tlbflush ++ ++ ++#ifdef __cplusplus ++} ++#endif ++#endif /* _GCC_LOONGARCH_BASE_INTRIN_H */ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/linux-common.h gcc-10.3.0/gcc/config/loongarch/linux-common.h +--- gcc-10.3.0.org/gcc/config/loongarch/linux-common.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/linux-common.h 2022-03-23 17:40:29.343280152 +0800 +@@ -0,0 +1,68 @@ ++/* Definitions for LARCH running Linux-based GNU systems with ELF format. ++ Copyright (C) 2012-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#undef TARGET_OS_CPP_BUILTINS ++#define TARGET_OS_CPP_BUILTINS() \ ++ do { \ ++ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ ++ /* The GNU C++ standard library requires this. */ \ ++ if (c_dialect_cxx ()) \ ++ builtin_define ("_GNU_SOURCE"); \ ++ ANDROID_TARGET_OS_CPP_BUILTINS(); \ ++ } while (0) ++ ++#define EXTRA_TARGET_D_OS_VERSIONS() \ ++ ANDROID_TARGET_D_OS_VERSIONS(); ++ ++#undef LINK_SPEC ++#define LINK_SPEC \ ++ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LINK_SPEC, \ ++ GNU_USER_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) ++ ++#undef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC \ ++ LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ ++ GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC) ++ ++#undef CC1PLUS_SPEC ++#define CC1PLUS_SPEC \ ++ LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) ++ ++#undef LIB_SPEC ++#define LIB_SPEC \ ++ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ ++ GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) ++ ++#undef STARTFILE_SPEC ++#define STARTFILE_SPEC \ ++ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) ++ ++#undef ENDFILE_SPEC ++#define ENDFILE_SPEC \ ++ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " " \ ++ GNU_USER_TARGET_ENDFILE_SPEC, \ ++ GNU_USER_TARGET_MATHFILE_SPEC " " \ ++ ANDROID_ENDFILE_SPEC) ++ ++/* Define this to be nonzero if static stack checking is supported. */ ++#define STACK_CHECK_STATIC_BUILTIN 1 ++ ++/* FIXME*/ ++/* The default value isn't sufficient in 64-bit mode. */ ++#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024) +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/linux.h gcc-10.3.0/gcc/config/loongarch/linux.h +--- gcc-10.3.0.org/gcc/config/loongarch/linux.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/linux.h 2022-03-23 17:40:29.344280144 +0800 +@@ -0,0 +1,29 @@ ++/* Definitions for LARCH running Linux-based GNU systems with ELF format. ++ Copyright (C) 1998-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define GNU_USER_LINK_EMULATION32 "elf32loongarch" ++#define GNU_USER_LINK_EMULATION64 "elf64loongarch" ++ ++#define GLIBC_DYNAMIC_LINKERLP32 \ ++ "/lib32/ld.so.1" ++#define GLIBC_DYNAMIC_LINKERLP64 \ ++ "/lib64/ld.so.1" ++ ++#define GNU_USER_DYNAMIC_LINKERLP32 GLIBC_DYNAMIC_LINKERLP32 ++#define GNU_USER_DYNAMIC_LINKERLP64 GLIBC_DYNAMIC_LINKERLP64 +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-builtins.c gcc-10.3.0/gcc/config/loongarch/loongarch-builtins.c +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-builtins.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-builtins.c 2022-03-23 17:40:29.344280144 +0800 +@@ -0,0 +1,593 @@ ++/* Subroutines used for expanding LOONGARCH builtins. ++ Copyright (C) 2011-2018 Free Software Foundation, Inc. ++ Contributed by Andrew Waterman (andrew@sifive.com). ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "rtl.h" ++#include "tree.h" ++#include "memmodel.h" ++#include "gimple.h" ++#include "tm_p.h" ++#include "optabs.h" ++#include "recog.h" ++#include "diagnostic.h" ++#include "fold-const.h" ++#include "expr.h" ++#include "langhooks.h" ++ ++/* Macros to create an enumeration identifier for a function prototype. */ ++#define LARCH_FTYPE_NAME1(A, B) LARCH_##A##_FTYPE_##B ++#define LARCH_FTYPE_NAME2(A, B, C) LARCH_##A##_FTYPE_##B##_##C ++#define LARCH_FTYPE_NAME3(A, B, C, D) LARCH_##A##_FTYPE_##B##_##C##_##D ++#define LARCH_FTYPE_NAME4(A, B, C, D, E) LARCH_##A##_FTYPE_##B##_##C##_##D##_##E ++ ++/* Classifies the prototype of a built-in function. */ ++enum loongarch_function_type { ++#define DEF_LARCH_FTYPE(NARGS, LIST) LARCH_FTYPE_NAME##NARGS LIST, ++#include "config/loongarch/loongarch-ftypes.def" ++#undef DEF_LARCH_FTYPE ++ LARCH_MAX_FTYPE_MAX ++}; ++ ++/* Specifies how a built-in function should be converted into rtl. */ ++enum loongarch_builtin_type { ++ /* The function corresponds directly to an .md pattern. The return ++ value is mapped to operand 0 and the arguments are mapped to ++ operands 1 and above. */ ++ LARCH_BUILTIN_DIRECT, ++ ++ /* The function corresponds directly to an .md pattern. There is no return ++ value and the arguments are mapped to operands 0 and above. */ ++ LARCH_BUILTIN_DIRECT_NO_TARGET, ++ ++}; ++ ++/* Invoke MACRO (COND) for each C.cond.fmt condition. */ ++#define LARCH_FP_CONDITIONS(MACRO) \ ++ MACRO (f), \ ++ MACRO (un), \ ++ MACRO (eq), \ ++ MACRO (ueq), \ ++ MACRO (olt), \ ++ MACRO (ult), \ ++ MACRO (ole), \ ++ MACRO (ule), \ ++ MACRO (sf), \ ++ MACRO (ngle), \ ++ MACRO (seq), \ ++ MACRO (ngl), \ ++ MACRO (lt), \ ++ MACRO (nge), \ ++ MACRO (le), \ ++ MACRO (ngt) ++ ++/* Enumerates the codes above as LARCH_FP_COND_. */ ++#define DECLARE_LARCH_COND(X) LARCH_FP_COND_ ## X ++enum loongarch_fp_condition { ++ LARCH_FP_CONDITIONS (DECLARE_LARCH_COND) ++}; ++#undef DECLARE_LARCH_COND ++ ++/* Index X provides the string representation of LARCH_FP_COND_. */ ++#define STRINGIFY(X) #X ++const char *const loongarch_fp_conditions[16] = { ++ LARCH_FP_CONDITIONS (STRINGIFY) ++}; ++#undef STRINGIFY ++ ++/* Declare an availability predicate for built-in functions that require ++ * COND to be true. NAME is the main part of the predicate's name. */ ++#define AVAIL_ALL(NAME, COND) \ ++ static unsigned int \ ++ loongarch_builtin_avail_##NAME (void) \ ++ { \ ++ return (COND) ? 1 : 0; \ ++ } ++ ++static unsigned int ++loongarch_builtin_avail_default (void) ++{ ++ return 1; ++} ++/* This structure describes a single built-in function. */ ++struct loongarch_builtin_description { ++ /* The code of the main .md file instruction. See loongarch_builtin_type ++ for more information. */ ++ enum insn_code icode; ++ ++ /* The floating-point comparison code to use with ICODE, if any. */ ++ enum loongarch_fp_condition cond; ++ ++ /* The name of the built-in function. */ ++ const char *name; ++ ++ /* Specifies how the function should be expanded. */ ++ enum loongarch_builtin_type builtin_type; ++ ++ /* The function's prototype. */ ++ enum loongarch_function_type function_type; ++ ++ /* Whether the function is available. */ ++ unsigned int (*avail) (void); ++}; ++ ++AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) ++AVAIL_ALL (lvz, TARGET_LVZ) ++ ++/* Construct a loongarch_builtin_description from the given arguments. ++ ++ INSN is the name of the associated instruction pattern, without the ++ leading CODE_FOR_loongarch_. ++ ++ CODE is the floating-point condition code associated with the ++ function. It can be 'f' if the field is not applicable. ++ ++ NAME is the name of the function itself, without the leading ++ "__builtin_loongarch_". ++ ++ BUILTIN_TYPE and FUNCTION_TYPE are loongarch_builtin_description fields. ++ ++ AVAIL is the name of the availability predicate, without the leading ++ loongarch_builtin_avail_. */ ++#define LARCH_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE, \ ++ FUNCTION_TYPE, AVAIL) \ ++ { CODE_FOR_loongarch_ ## INSN, LARCH_FP_COND_ ## COND, \ ++ "__builtin_loongarch_" NAME, BUILTIN_TYPE, FUNCTION_TYPE, \ ++ loongarch_builtin_avail_ ## AVAIL } ++ ++/* Define __builtin_loongarch_, which is a LARCH_BUILTIN_DIRECT function ++ mapped to instruction CODE_FOR_loongarch_, FUNCTION_TYPE and AVAIL ++ are as for LARCH_BUILTIN. */ ++#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL) ++ ++/* Define __builtin_loongarch_, which is a LARCH_BUILTIN_DIRECT_NO_TARGET ++ function mapped to instruction CODE_FOR_loongarch_, FUNCTION_TYPE ++ and AVAIL are as for LARCH_BUILTIN. */ ++#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \ ++ FUNCTION_TYPE, AVAIL) ++ ++/* Define an LVZ LARCH_BUILTIN_DIRECT function __builtin_lvz_ ++ for instruction CODE_FOR_lvz_. FUNCTION_TYPE is a builtin_description ++ field. */ ++#define LVZ_BUILTIN(INSN, FUNCTION_TYPE) \ ++ { CODE_FOR_lvz_ ## INSN, LARCH_FP_COND_f, \ ++ "__builtin_lvz_" #INSN, LARCH_BUILTIN_DIRECT, \ ++ FUNCTION_TYPE, loongarch_builtin_avail_lvz } ++ ++ /* Loongson support loongarch64r6 */ ++#define CODE_FOR_loongarch_fmax_sf CODE_FOR_smaxsf3 ++#define CODE_FOR_loongarch_fmax_df CODE_FOR_smaxdf3 ++#define CODE_FOR_loongarch_fmin_sf CODE_FOR_sminsf3 ++#define CODE_FOR_loongarch_fmin_df CODE_FOR_smindf3 ++#define CODE_FOR_loongarch_fmaxa_sf CODE_FOR_smaxasf3 ++#define CODE_FOR_loongarch_fmaxa_df CODE_FOR_smaxadf3 ++#define CODE_FOR_loongarch_fmina_sf CODE_FOR_sminasf3 ++#define CODE_FOR_loongarch_fmina_df CODE_FOR_sminadf3 ++#define CODE_FOR_loongarch_fclass_s CODE_FOR_fclass_s ++#define CODE_FOR_loongarch_fclass_d CODE_FOR_fclass_d ++#define CODE_FOR_loongarch_frint_s CODE_FOR_frint_s ++#define CODE_FOR_loongarch_frint_d CODE_FOR_frint_d ++#define CODE_FOR_loongarch_bytepick_w CODE_FOR_bytepick_w ++#define CODE_FOR_loongarch_bytepick_d CODE_FOR_bytepick_d ++#define CODE_FOR_loongarch_bitrev_4b CODE_FOR_bitrev_4b ++#define CODE_FOR_loongarch_bitrev_8b CODE_FOR_bitrev_8b ++ ++/* Loongson support crc */ ++#define CODE_FOR_loongarch_crc_w_b_w CODE_FOR_crc_w_b_w ++#define CODE_FOR_loongarch_crc_w_h_w CODE_FOR_crc_w_h_w ++#define CODE_FOR_loongarch_crc_w_w_w CODE_FOR_crc_w_w_w ++#define CODE_FOR_loongarch_crc_w_d_w CODE_FOR_crc_w_d_w ++#define CODE_FOR_loongarch_crcc_w_b_w CODE_FOR_crcc_w_b_w ++#define CODE_FOR_loongarch_crcc_w_h_w CODE_FOR_crcc_w_h_w ++#define CODE_FOR_loongarch_crcc_w_w_w CODE_FOR_crcc_w_w_w ++#define CODE_FOR_loongarch_crcc_w_d_w CODE_FOR_crcc_w_d_w ++ ++/* Privileged state instruction */ ++#define CODE_FOR_loongarch_cpucfg CODE_FOR_cpucfg ++#define CODE_FOR_loongarch_asrtle_d CODE_FOR_asrtle_d ++#define CODE_FOR_loongarch_asrtgt_d CODE_FOR_asrtgt_d ++#define CODE_FOR_loongarch_csrrd CODE_FOR_csrrd ++#define CODE_FOR_loongarch_dcsrrd CODE_FOR_dcsrrd ++#define CODE_FOR_loongarch_csrwr CODE_FOR_csrwr ++#define CODE_FOR_loongarch_dcsrwr CODE_FOR_dcsrwr ++#define CODE_FOR_loongarch_csrxchg CODE_FOR_csrxchg ++#define CODE_FOR_loongarch_dcsrxchg CODE_FOR_dcsrxchg ++#define CODE_FOR_loongarch_iocsrrd_b CODE_FOR_iocsrrd_b ++#define CODE_FOR_loongarch_iocsrrd_h CODE_FOR_iocsrrd_h ++#define CODE_FOR_loongarch_iocsrrd_w CODE_FOR_iocsrrd_w ++#define CODE_FOR_loongarch_iocsrrd_d CODE_FOR_iocsrrd_d ++#define CODE_FOR_loongarch_iocsrwr_b CODE_FOR_iocsrwr_b ++#define CODE_FOR_loongarch_iocsrwr_h CODE_FOR_iocsrwr_h ++#define CODE_FOR_loongarch_iocsrwr_w CODE_FOR_iocsrwr_w ++#define CODE_FOR_loongarch_iocsrwr_d CODE_FOR_iocsrwr_d ++#define CODE_FOR_loongarch_lddir CODE_FOR_lddir ++#define CODE_FOR_loongarch_dlddir CODE_FOR_dlddir ++#define CODE_FOR_loongarch_ldpte CODE_FOR_ldpte ++#define CODE_FOR_loongarch_dldpte CODE_FOR_dldpte ++#define CODE_FOR_loongarch_cacop CODE_FOR_cacop ++#define CODE_FOR_loongarch_dcacop CODE_FOR_dcacop ++#define CODE_FOR_loongarch_dbar CODE_FOR_dbar ++#define CODE_FOR_loongarch_ibar CODE_FOR_ibar ++ ++static const struct loongarch_builtin_description loongarch_builtins[] = { ++#define LARCH_MOVFCSR2GR 0 ++ DIRECT_BUILTIN (movfcsr2gr, LARCH_USI_FTYPE_UQI, hard_float), ++#define LARCH_MOVGR2FCSR 1 ++ DIRECT_NO_TARGET_BUILTIN (movgr2fcsr, LARCH_VOID_FTYPE_UQI_USI, hard_float), ++ ++ DIRECT_NO_TARGET_BUILTIN (cacop, LARCH_VOID_FTYPE_USI_USI_SI, default), ++ DIRECT_NO_TARGET_BUILTIN (dcacop, LARCH_VOID_FTYPE_USI_UDI_SI, default), ++ DIRECT_NO_TARGET_BUILTIN (dbar, LARCH_VOID_FTYPE_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (ibar, LARCH_VOID_FTYPE_USI, default), ++ ++ DIRECT_BUILTIN (fmax_sf, LARCH_SF_FTYPE_SF_SF, hard_float), ++ DIRECT_BUILTIN (fmax_df, LARCH_DF_FTYPE_DF_DF, hard_float), ++ DIRECT_BUILTIN (fmin_sf, LARCH_SF_FTYPE_SF_SF, hard_float), ++ DIRECT_BUILTIN (fmin_df, LARCH_DF_FTYPE_DF_DF, hard_float), ++ DIRECT_BUILTIN (fmaxa_sf, LARCH_SF_FTYPE_SF_SF, hard_float), ++ DIRECT_BUILTIN (fmaxa_df, LARCH_DF_FTYPE_DF_DF, hard_float), ++ DIRECT_BUILTIN (fmina_sf, LARCH_SF_FTYPE_SF_SF, hard_float), ++ DIRECT_BUILTIN (fmina_df, LARCH_DF_FTYPE_DF_DF, hard_float), ++ DIRECT_BUILTIN (fclass_s, LARCH_SF_FTYPE_SF, hard_float), ++ DIRECT_BUILTIN (fclass_d, LARCH_DF_FTYPE_DF, hard_float), ++ DIRECT_BUILTIN (frint_s, LARCH_SF_FTYPE_SF, hard_float), ++ DIRECT_BUILTIN (frint_d, LARCH_DF_FTYPE_DF, hard_float), ++ DIRECT_BUILTIN (bytepick_w, LARCH_SI_FTYPE_SI_SI_QI, default), ++ DIRECT_BUILTIN (bytepick_d, LARCH_DI_FTYPE_DI_DI_QI, default), ++ DIRECT_BUILTIN (bitrev_4b, LARCH_SI_FTYPE_SI, default), ++ DIRECT_BUILTIN (bitrev_8b, LARCH_DI_FTYPE_DI, default), ++ DIRECT_BUILTIN (cpucfg, LARCH_USI_FTYPE_USI, default), ++ DIRECT_BUILTIN (asrtle_d, LARCH_VOID_FTYPE_DI_DI, default), ++ DIRECT_BUILTIN (asrtgt_d, LARCH_VOID_FTYPE_DI_DI, default), ++ DIRECT_BUILTIN (dlddir, LARCH_DI_FTYPE_DI_UQI, default), ++ DIRECT_BUILTIN (lddir, LARCH_SI_FTYPE_SI_UQI, default), ++ DIRECT_NO_TARGET_BUILTIN (dldpte, LARCH_VOID_FTYPE_DI_UQI, default), ++ DIRECT_NO_TARGET_BUILTIN (ldpte, LARCH_VOID_FTYPE_SI_UQI, default), ++ ++ /* CRC Instrinsic */ ++ ++ DIRECT_BUILTIN (crc_w_b_w, LARCH_SI_FTYPE_QI_SI, default), ++ DIRECT_BUILTIN (crc_w_h_w, LARCH_SI_FTYPE_HI_SI, default), ++ DIRECT_BUILTIN (crc_w_w_w, LARCH_SI_FTYPE_SI_SI, default), ++ DIRECT_BUILTIN (crc_w_d_w, LARCH_SI_FTYPE_DI_SI, default), ++ DIRECT_BUILTIN (crcc_w_b_w, LARCH_SI_FTYPE_QI_SI, default), ++ DIRECT_BUILTIN (crcc_w_h_w, LARCH_SI_FTYPE_HI_SI, default), ++ DIRECT_BUILTIN (crcc_w_w_w, LARCH_SI_FTYPE_SI_SI, default), ++ DIRECT_BUILTIN (crcc_w_d_w, LARCH_SI_FTYPE_DI_SI, default), ++ ++ /* Built-in functions for LVZ. */ ++ LVZ_BUILTIN (gcsrrd, LARCH_USI_FTYPE_USI), ++ LVZ_BUILTIN (gcsrwr, LARCH_USI_FTYPE_USI_USI), ++ LVZ_BUILTIN (gcsrxchg, LARCH_USI_FTYPE_USI_USI_USI), ++ LVZ_BUILTIN (dgcsrrd, LARCH_UDI_FTYPE_USI), ++ LVZ_BUILTIN (dgcsrwr, LARCH_UDI_FTYPE_UDI_USI), ++ LVZ_BUILTIN (dgcsrxchg, LARCH_UDI_FTYPE_UDI_UDI_USI), ++ ++ DIRECT_BUILTIN (csrrd, LARCH_USI_FTYPE_USI, default), ++ DIRECT_BUILTIN (dcsrrd, LARCH_UDI_FTYPE_USI, default), ++ DIRECT_BUILTIN (csrwr, LARCH_USI_FTYPE_USI_USI, default), ++ DIRECT_BUILTIN (dcsrwr, LARCH_UDI_FTYPE_UDI_USI, default), ++ DIRECT_BUILTIN (csrxchg, LARCH_USI_FTYPE_USI_USI_USI, default), ++ DIRECT_BUILTIN (dcsrxchg, LARCH_UDI_FTYPE_UDI_UDI_USI, default), ++ DIRECT_BUILTIN (iocsrrd_b, LARCH_UQI_FTYPE_USI, default), ++ DIRECT_BUILTIN (iocsrrd_h, LARCH_UHI_FTYPE_USI, default), ++ DIRECT_BUILTIN (iocsrrd_w, LARCH_USI_FTYPE_USI, default), ++ DIRECT_BUILTIN (iocsrrd_d, LARCH_UDI_FTYPE_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (iocsrwr_b, LARCH_VOID_FTYPE_UQI_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (iocsrwr_h, LARCH_VOID_FTYPE_UHI_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (iocsrwr_w, LARCH_VOID_FTYPE_USI_USI, default), ++ DIRECT_NO_TARGET_BUILTIN (iocsrwr_d, LARCH_VOID_FTYPE_UDI_USI, default), ++}; ++ ++/* Index I is the function declaration for loongarch_builtins[I], or null if the ++ function isn't defined on this target. */ ++static GTY(()) tree loongarch_builtin_decls[ARRAY_SIZE (loongarch_builtins)]; ++/* Get the index I of the function declaration for loongarch_builtin_decls[I] ++ using the instruction code or return null if not defined for the target. */ ++static GTY(()) int loongarch_get_builtin_decl_index[NUM_INSN_CODES]; ++ ++/* Return a type for 'const volatile void *'. */ ++ ++static tree ++loongarch_build_cvpointer_type (void) ++{ ++ static tree cache; ++ ++ if (cache == NULL_TREE) ++ cache = build_pointer_type (build_qualified_type ++ (void_type_node, ++ TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); ++ return cache; ++} ++ ++/* Source-level argument types. */ ++#define LARCH_ATYPE_VOID void_type_node ++#define LARCH_ATYPE_INT integer_type_node ++#define LARCH_ATYPE_POINTER ptr_type_node ++#define LARCH_ATYPE_CVPOINTER loongarch_build_cvpointer_type () ++ ++/* Standard mode-based argument types. */ ++#define LARCH_ATYPE_QI intQI_type_node ++#define LARCH_ATYPE_UQI unsigned_intQI_type_node ++#define LARCH_ATYPE_HI intHI_type_node ++#define LARCH_ATYPE_UHI unsigned_intHI_type_node ++#define LARCH_ATYPE_SI intSI_type_node ++#define LARCH_ATYPE_USI unsigned_intSI_type_node ++#define LARCH_ATYPE_DI intDI_type_node ++#define LARCH_ATYPE_UDI unsigned_intDI_type_node ++#define LARCH_ATYPE_SF float_type_node ++#define LARCH_ATYPE_DF double_type_node ++ ++/* LARCH_FTYPE_ATYPESN takes N LARCH_FTYPES-like type codes and lists ++ their associated LARCH_ATYPEs. */ ++#define LARCH_FTYPE_ATYPES1(A, B) \ ++ LARCH_ATYPE_##A, LARCH_ATYPE_##B ++ ++#define LARCH_FTYPE_ATYPES2(A, B, C) \ ++ LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C ++ ++#define LARCH_FTYPE_ATYPES3(A, B, C, D) \ ++ LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C, LARCH_ATYPE_##D ++ ++#define LARCH_FTYPE_ATYPES4(A, B, C, D, E) \ ++ LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C, LARCH_ATYPE_##D, \ ++ LARCH_ATYPE_##E ++ ++/* Return the function type associated with function prototype TYPE. */ ++ ++static tree ++loongarch_build_function_type (enum loongarch_function_type type) ++{ ++ static tree types[(int) LARCH_MAX_FTYPE_MAX]; ++ ++ if (types[(int) type] == NULL_TREE) ++ switch (type) ++ { ++#define DEF_LARCH_FTYPE(NUM, ARGS) \ ++ case LARCH_FTYPE_NAME##NUM ARGS: \ ++ types[(int) type] \ ++ = build_function_type_list (LARCH_FTYPE_ATYPES##NUM ARGS, \ ++ NULL_TREE); \ ++ break; ++#include "config/loongarch/loongarch-ftypes.def" ++#undef DEF_LARCH_FTYPE ++ default: ++ gcc_unreachable (); ++ } ++ ++ return types[(int) type]; ++} ++ ++/* Implement TARGET_INIT_BUILTINS. */ ++ ++void ++loongarch_init_builtins (void) ++{ ++ const struct loongarch_builtin_description *d; ++ unsigned int i; ++ ++ /* Iterate through all of the bdesc arrays, initializing all of the ++ builtin functions. */ ++ for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++) ++ { ++ d = &loongarch_builtins[i]; ++ if (d->avail ()) ++ { ++ loongarch_builtin_decls[i] ++ = add_builtin_function (d->name, ++ loongarch_build_function_type (d->function_type), ++ i, BUILT_IN_MD, NULL, NULL); ++ loongarch_get_builtin_decl_index[d->icode] = i; ++ } ++ } ++} ++ ++/* Implement TARGET_BUILTIN_DECL. */ ++ ++tree ++loongarch_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED) ++{ ++ if (code >= ARRAY_SIZE (loongarch_builtins)) ++ return error_mark_node; ++ return loongarch_builtin_decls[code]; ++} ++ ++/* Take argument ARGNO from EXP's argument list and convert it into ++ an expand operand. Store the operand in *OP. */ ++ ++static void ++loongarch_prepare_builtin_arg (struct expand_operand *op, tree exp, ++ unsigned int argno) ++{ ++ tree arg; ++ rtx value; ++ ++ arg = CALL_EXPR_ARG (exp, argno); ++ value = expand_normal (arg); ++ create_input_operand (op, value, TYPE_MODE (TREE_TYPE (arg))); ++} ++ ++/* Expand instruction ICODE as part of a built-in function sequence. ++ Use the first NOPS elements of OPS as the instruction's operands. ++ HAS_TARGET_P is true if operand 0 is a target; it is false if the ++ instruction has no target. ++ ++ Return the target rtx if HAS_TARGET_P, otherwise return const0_rtx. */ ++ ++static rtx ++loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, ++ struct expand_operand *ops, bool has_target_p) ++{ ++ int error_opno = 0, rangelo = 0, rangehi =0 ; ++ ++ switch(icode){ ++ case CODE_FOR_csrrd: ++ case CODE_FOR_dcsrrd: ++ case CODE_FOR_csrwr: ++ case CODE_FOR_dcsrwr: ++ case CODE_FOR_csrxchg: ++ case CODE_FOR_dcsrxchg: ++ case CODE_FOR_iocsrrd_b: ++ case CODE_FOR_iocsrrd_h: ++ case CODE_FOR_iocsrrd_w: ++ case CODE_FOR_iocsrrd_d: ++ case CODE_FOR_iocsrwr_b: ++ case CODE_FOR_iocsrwr_h: ++ case CODE_FOR_iocsrwr_w: ++ case CODE_FOR_iocsrwr_d: ++ if (!maybe_expand_insn (icode, nops, ops)) ++ { ++ error ("invalid argument to built-in function"); ++ return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx; ++ } ++ emit_barrier(); ++ break; ++ default: ++ break; ++ } ++ ++ if (error_opno != 0) ++ { ++ error ("argument %d to the built-in must be a constant" ++ " in range %d to %d", error_opno, rangelo, rangehi); ++ return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx; ++ } ++ else if (!maybe_expand_insn (icode, nops, ops)) ++ { ++ error ("invalid argument to built-in function"); ++ return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx; ++ } ++ ++ return has_target_p ? ops[0].value : const0_rtx; ++} ++ ++/* Expand a LARCH_BUILTIN_DIRECT or LARCH_BUILTIN_DIRECT_NO_TARGET function; ++ HAS_TARGET_P says which. EXP is the CALL_EXPR that calls the function ++ and ICODE is the code of the associated .md pattern. TARGET, if nonnull, ++ suggests a good place to put the result. */ ++ ++static rtx ++loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, ++ bool has_target_p) ++{ ++ struct expand_operand ops[MAX_RECOG_OPERANDS]; ++ int opno, argno; ++ ++ /* Map any target to operand 0. */ ++ opno = 0; ++ if (has_target_p) ++ create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp))); ++ ++ /* Map the arguments to the other operands. */ ++ gcc_assert (opno + call_expr_nargs (exp) ++ == insn_data[icode].n_generator_args); ++ for (argno = 0; argno < call_expr_nargs (exp); argno++) ++ loongarch_prepare_builtin_arg (&ops[opno++], exp, argno); ++ ++ return loongarch_expand_builtin_insn (icode, opno, ops, has_target_p); ++} ++ ++/* Implement TARGET_EXPAND_BUILTIN. */ ++ ++rtx ++loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ++ machine_mode mode, int ignore) ++{ ++ tree fndecl; ++ unsigned int fcode, avail; ++ const struct loongarch_builtin_description *d; ++ ++ fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); ++ fcode = DECL_FUNCTION_CODE (fndecl); ++ gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins)); ++ d = &loongarch_builtins[fcode]; ++ avail = d->avail (); ++ gcc_assert (avail != 0); ++ switch (d->builtin_type) ++ { ++ case LARCH_BUILTIN_DIRECT: ++ return loongarch_expand_builtin_direct (d->icode, target, exp, true); ++ ++ case LARCH_BUILTIN_DIRECT_NO_TARGET: ++ return loongarch_expand_builtin_direct (d->icode, target, exp, false); ++ ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ ++ ++void ++loongarch_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) ++{ ++ if (!TARGET_HARD_FLOAT_ABI) ++ return; ++ tree exceptions_var = create_tmp_var_raw (LARCH_ATYPE_USI); ++ tree fcsr_orig_var = create_tmp_var_raw (LARCH_ATYPE_USI); ++ tree fcsr_mod_var = create_tmp_var_raw (LARCH_ATYPE_USI); ++ tree const0 = build_int_cst (LARCH_ATYPE_UQI, 0); ++ tree get_fcsr = loongarch_builtin_decls[LARCH_MOVFCSR2GR]; ++ tree set_fcsr = loongarch_builtin_decls[LARCH_MOVGR2FCSR]; ++ tree get_fcsr_hold_call = build_call_expr (get_fcsr, 1, const0); ++ tree hold_assign_orig = build2 (MODIFY_EXPR, LARCH_ATYPE_USI, ++ fcsr_orig_var, get_fcsr_hold_call); ++ tree hold_mod_val = build2 (BIT_AND_EXPR, LARCH_ATYPE_USI, fcsr_orig_var, ++ build_int_cst (LARCH_ATYPE_USI, 0xffe0ffe0)); ++ tree hold_assign_mod = build2 (MODIFY_EXPR, LARCH_ATYPE_USI, ++ fcsr_mod_var, hold_mod_val); ++ tree set_fcsr_hold_call = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var); ++ tree hold_all = build2 (COMPOUND_EXPR, LARCH_ATYPE_USI, ++ hold_assign_orig, hold_assign_mod); ++ *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, ++ set_fcsr_hold_call); ++ ++ *clear = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var); ++ ++ tree get_fcsr_update_call = build_call_expr (get_fcsr, 1, const0); ++ *update = build2 (MODIFY_EXPR, LARCH_ATYPE_USI, ++ exceptions_var, get_fcsr_update_call); ++ tree set_fcsr_update_call = build_call_expr (set_fcsr, 2, const0, fcsr_orig_var); ++ *update = build2 (COMPOUND_EXPR, void_type_node, *update, ++ set_fcsr_update_call); ++ tree atomic_feraiseexcept ++ = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); ++ tree int_exceptions_var = fold_convert (integer_type_node, ++ exceptions_var); ++ tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, ++ 1, int_exceptions_var); ++ *update = build2 (COMPOUND_EXPR, void_type_node, *update, ++ atomic_feraiseexcept_call); ++} ++ ++/* Implement TARGET_BUILTIN_VA_LIST. */ ++ ++tree ++loongarch_build_builtin_va_list (void) ++{ ++ return ptr_type_node; ++} ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch.c gcc-10.3.0/gcc/config/loongarch/loongarch.c +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch.c 2022-03-23 17:40:29.347280120 +0800 +@@ -0,0 +1,9636 @@ ++/* Subroutines used for LARCH code generation. ++ Copyright (C) 1989-2018 Free Software Foundation, Inc. ++ Contributed by A. Lichnewsky, lich@inria.inria.fr. ++ Changes by Michael Meissner, meissner@osf.org. ++ 64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and ++ Brendan Eich, brendan@microunity.com. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "rtl.h" ++#include "tree.h" ++#include "memmodel.h" ++#include "gimple.h" ++#include "cfghooks.h" ++#include "df.h" ++#include "tm_p.h" ++#include "stringpool.h" ++#include "attribs.h" ++#include "optabs.h" ++#include "regs.h" ++#include "emit-rtl.h" ++#include "recog.h" ++#include "cgraph.h" ++#include "diagnostic.h" ++#include "insn-attr.h" ++#include "output.h" ++#include "alias.h" ++#include "fold-const.h" ++#include "varasm.h" ++#include "stor-layout.h" ++#include "calls.h" ++#include "explow.h" ++#include "expr.h" ++#include "libfuncs.h" ++#include "reload.h" ++#include "common/common-target.h" ++#include "langhooks.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "sched-int.h" ++#include "gimplify.h" ++#include "target-globals.h" ++#include "tree-pass.h" ++#include "context.h" ++#include "builtins.h" ++#include "rtl-iter.h" ++ ++/* This file should be included last. */ ++#include "target-def.h" ++ ++/* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ ++#define UNSPEC_ADDRESS_P(X) \ ++ (GET_CODE (X) == UNSPEC \ ++ && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \ ++ && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) ++ ++/* Extract the symbol or label from UNSPEC wrapper X. */ ++#define UNSPEC_ADDRESS(X) \ ++ XVECEXP (X, 0, 0) ++ ++/* Extract the symbol type from UNSPEC wrapper X. */ ++#define UNSPEC_ADDRESS_TYPE(X) \ ++ ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) ++ ++/* The maximum distance between the top of the stack frame and the ++ value $sp has when we save and restore registers. ++*/ ++#define LARCH_MAX_FIRST_STACK_STEP 0x7f0 ++ ++/* True if INSN is a loongarch.md pattern or asm statement. */ ++/* ??? This test exists through the compiler, perhaps it should be ++ moved to rtl.h. */ ++#define USEFUL_INSN_P(INSN) \ ++ (NONDEBUG_INSN_P (INSN) \ ++ && GET_CODE (PATTERN (INSN)) != USE \ ++ && GET_CODE (PATTERN (INSN)) != CLOBBER) ++ ++/* If INSN is a delayed branch sequence, return the first instruction ++ in the sequence, otherwise return INSN itself. */ ++#define SEQ_BEGIN(INSN) \ ++ (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE \ ++ ? as_a (XVECEXP (PATTERN (INSN), 0, 0)) \ ++ : (INSN)) ++ ++/* Likewise for the last instruction in a delayed branch sequence. */ ++#define SEQ_END(INSN) \ ++ (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE \ ++ ? as_a (XVECEXP (PATTERN (INSN), \ ++ 0, \ ++ XVECLEN (PATTERN (INSN), 0) - 1)) \ ++ : (INSN)) ++ ++/* Execute the following loop body with SUBINSN set to each instruction ++ between SEQ_BEGIN (INSN) and SEQ_END (INSN) inclusive. */ ++#define FOR_EACH_SUBINSN(SUBINSN, INSN) \ ++ for ((SUBINSN) = SEQ_BEGIN (INSN); \ ++ (SUBINSN) != NEXT_INSN (SEQ_END (INSN)); \ ++ (SUBINSN) = NEXT_INSN (SUBINSN)) ++ ++/* True if bit BIT is set in VALUE. */ ++#define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0) ++ ++/* Classifies an address. ++ ++ ADDRESS_REG ++ A natural register + offset address. The register satisfies ++ loongarch_valid_base_register_p and the offset is a const_arith_operand. ++ ++ ADDRESS_CONST_INT ++ A signed 16-bit constant address. ++ ++ ADDRESS_SYMBOLIC: ++ A constant symbolic address. */ ++enum loongarch_address_type { ++ ADDRESS_REG, ++ ADDRESS_CONST_INT, ++ ADDRESS_SYMBOLIC ++}; ++ ++/* A class used to control a comdat-style stub that we output in each ++ translation unit that needs it. */ ++class loongarch_one_only_stub { ++public: ++ virtual ~loongarch_one_only_stub () {} ++ ++ /* Return the name of the stub. */ ++ virtual const char *get_name () = 0; ++ ++ /* Output the body of the function to asm_out_file. */ ++ virtual void output_body () = 0; ++}; ++ ++/* Tuning information that is automatically derived from other sources ++ (such as the scheduler). */ ++static struct { ++ /* The architecture and tuning settings that this structure describes. */ ++ enum processor arch; ++ enum processor tune; ++ ++ /* True if the structure has been initialized. */ ++ bool initialized_p; ++ ++} loongarch_tuning_info; ++ ++/* Information about an address described by loongarch_address_type. ++ ++ ADDRESS_CONST_INT ++ No fields are used. ++ ++ ADDRESS_REG ++ REG is the base register and OFFSET is the constant offset. ++ ++ ADDRESS_SYMBOLIC ++ SYMBOL_TYPE is the type of symbol that the address references. */ ++struct loongarch_address_info { ++ enum loongarch_address_type type; ++ rtx reg; ++ rtx offset; ++ enum loongarch_symbol_type symbol_type; ++}; ++ ++/* Method to load immediate number fields. ++ ++ METHOD_NORMAL: ++ load immediate number 0-31 bit ++ ++ METHOD_LU32I: ++ load imm 32-51 bit ++ ++ METHOD_LU52I: ++ load imm 52-63 bit ++ ++ METHOD_INSV: ++ imm 0xfff00000fffffxxx ++ */ ++enum loongarch_load_imm_method { ++ METHOD_NORMAL, ++ METHOD_LU32I, ++ METHOD_LU52I, ++ METHOD_INSV ++}; ++ ++/* One stage in a constant building sequence. These sequences have ++ the form: ++ ++ A = VALUE[0] ++ A = A CODE[1] VALUE[1] ++ A = A CODE[2] VALUE[2] ++ ... ++ ++ where A is an accumulator, each CODE[i] is a binary rtl operation ++ and each VALUE[i] is a constant integer. CODE[0] is undefined. */ ++struct loongarch_integer_op { ++ enum rtx_code code; ++ unsigned HOST_WIDE_INT value; ++ enum loongarch_load_imm_method method; ++}; ++ ++/* The largest number of operations needed to load an integer constant. ++ The worst accepted case for 64-bit constants is LUI,ORI,SLL,ORI,SLL,ORI. ++ When the lowest bit is clear, we can try, but reject a sequence with ++ an extra SLL at the end. */ ++#define LARCH_MAX_INTEGER_OPS 9 ++ ++/* Costs of various operations on the different architectures. */ ++ ++struct loongarch_rtx_cost_data ++{ ++ unsigned short fp_add; ++ unsigned short fp_mult_sf; ++ unsigned short fp_mult_df; ++ unsigned short fp_div_sf; ++ unsigned short fp_div_df; ++ unsigned short int_mult_si; ++ unsigned short int_mult_di; ++ unsigned short int_div_si; ++ unsigned short int_div_di; ++ unsigned short branch_cost; ++ unsigned short memory_latency; ++}; ++ ++/* Global variables for machine-dependent things. */ ++ ++/* The -G setting, or the configuration's default small-data limit if ++ no -G option is given. */ ++static unsigned int loongarch_small_data_threshold; ++ ++/* The number of file directives written by loongarch_output_filename. */ ++int num_source_filenames; ++ ++/* The name that appeared in the last .file directive written by ++ loongarch_output_filename, or "" if loongarch_output_filename hasn't ++ written anything yet. */ ++const char *current_function_file = ""; ++ ++/* Arrays that map GCC register numbers to debugger register numbers. */ ++int loongarch_dbx_regno[FIRST_PSEUDO_REGISTER]; ++int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER]; ++ ++/* Information about the current function's epilogue, used only while ++ expanding it. */ ++static struct { ++ /* A list of queued REG_CFA_RESTORE notes. */ ++ rtx cfa_restores; ++ ++ /* The CFA is currently defined as CFA_REG + CFA_OFFSET. */ ++ rtx cfa_reg; ++ HOST_WIDE_INT cfa_offset; ++ ++ /* The offset of the CFA from the stack pointer while restoring ++ registers. */ ++ HOST_WIDE_INT cfa_restore_sp_offset; ++} loongarch_epilogue; ++ ++ ++/* The current instruction-set architecture. */ ++enum processor loongarch_arch; ++const struct loongarch_cpu_info *loongarch_arch_info; ++ ++/* The processor that we should tune the code for. */ ++enum processor loongarch_tune; ++const struct loongarch_cpu_info *loongarch_tune_info; ++ ++/* The ISA level associated with loongarch_arch. */ ++int loongarch_isa; ++ ++/* The ISA revision level. */ ++int loongarch_isa_rev; ++ ++/* The architecture selected by -loongarchN, or null if -loongarchN wasn't used. */ ++static const struct loongarch_cpu_info *loongarch_isa_option_info; ++ ++/* Which cost information to use. */ ++static const struct loongarch_rtx_cost_data *loongarch_cost; ++ ++/* The ambient target flags. */ ++static int loongarch_base_target_flags; ++ ++/* The default compression mode. */ ++unsigned int loongarch_base_compression_flags; ++ ++/* The ambient values of other global variables. */ ++static int loongarch_base_schedule_insns; /* flag_schedule_insns */ ++static int loongarch_base_reorder_blocks_and_partition; /* flag_reorder... */ ++static int loongarch_base_move_loop_invariants; /* flag_move_loop_invariants */ ++static const char *loongarch_base_align_loops; /* flag_align_loops */ ++static const char *loongarch_base_align_jumps; /* flag_align_jumps */ ++static const char *loongarch_base_align_functions; /* str_align_functions */ ++ ++/* Index [M][R] is true if register R is allowed to hold a value of mode M. */ ++static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER]; ++ ++/* Index C is true if character C is a valid PRINT_OPERAND punctation ++ character. */ ++static bool loongarch_print_operand_punct[256]; ++ ++static GTY (()) int loongarch_output_filename_first_time = 1; ++ ++/* loongarch_use_pcrel_pool_p[X] is true if symbols of type X should be ++ forced into a PC-relative constant pool. */ ++bool loongarch_use_pcrel_pool_p[NUM_SYMBOL_TYPES]; ++ ++/* Cached value of can_issue_more. This is cached in loongarch_variable_issue hook ++ and returned from loongarch_sched_reorder2. */ ++static int cached_can_issue_more; ++ ++/* Index R is the smallest register class that contains register R. */ ++const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = { ++ GR_REGS, GR_REGS, GR_REGS, GR_REGS, ++ JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, ++ JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, ++ SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, ++ SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, ++ SIBCALL_REGS, GR_REGS, GR_REGS, JALR_REGS, ++ JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, ++ JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, ++ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, ++ ST_REGS, ST_REGS, ST_REGS, ST_REGS, ++ ST_REGS, ST_REGS, ST_REGS, ST_REGS, ++ FRAME_REGS, FRAME_REGS ++}; ++ ++static tree loongarch_handle_interrupt_attr (tree *, tree, tree, int, bool *); ++static tree loongarch_handle_use_shadow_register_set_attr (tree *, tree, tree, int, ++ bool *); ++ ++/* The value of TARGET_ATTRIBUTE_TABLE. */ ++static const struct attribute_spec loongarch_attribute_table[] = { ++ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, ++ affects_type_identity, handler, exclude } */ ++ { "long_call", 0, 0, false, true, true, false, NULL, NULL }, ++ { "short_call", 0, 0, false, true, true, false, NULL, NULL }, ++ { "far", 0, 0, false, true, true, false, NULL, NULL }, ++ { "near", 0, 0, false, true, true, false, NULL, NULL }, ++ { "nocompression", 0, 0, true, false, false, false, NULL, NULL }, ++ /* Allow functions to be specified as interrupt handlers */ ++ { "interrupt", 0, 1, false, true, true, false, loongarch_handle_interrupt_attr, ++ NULL }, ++ { "use_shadow_register_set", 0, 1, false, true, true, false, ++ loongarch_handle_use_shadow_register_set_attr, NULL }, ++ { "keep_interrupts_masked", 0, 0, false, true, true, false, NULL, NULL }, ++ { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL }, ++ { NULL, 0, 0, false, false, false, false, NULL, NULL } ++}; ++ ++/* A table describing all the processors GCC knows about; see ++ loongarch-cpus.def for details. */ ++static const struct loongarch_cpu_info loongarch_cpu_info_table[] = { ++#define LARCH_CPU(NAME, CPU, ISA, FLAGS) \ ++ { NAME, CPU, ISA, FLAGS }, ++#include "loongarch-cpus.def" ++#undef LARCH_CPU ++}; ++ ++/* Default costs. If these are used for a processor we should look ++ up the actual costs. */ ++#define DEFAULT_COSTS COSTS_N_INSNS (6), /* fp_add */ \ ++ COSTS_N_INSNS (7), /* fp_mult_sf */ \ ++ COSTS_N_INSNS (8), /* fp_mult_df */ \ ++ COSTS_N_INSNS (23), /* fp_div_sf */ \ ++ COSTS_N_INSNS (36), /* fp_div_df */ \ ++ COSTS_N_INSNS (10), /* int_mult_si */ \ ++ COSTS_N_INSNS (10), /* int_mult_di */ \ ++ COSTS_N_INSNS (69), /* int_div_si */ \ ++ COSTS_N_INSNS (69), /* int_div_di */ \ ++ 2, /* branch_cost */ \ ++ 4 /* memory_latency */ ++ ++/* Floating-point costs for processors without an FPU. Just assume that ++ all floating-point libcalls are very expensive. */ ++#define SOFT_FP_COSTS COSTS_N_INSNS (256), /* fp_add */ \ ++ COSTS_N_INSNS (256), /* fp_mult_sf */ \ ++ COSTS_N_INSNS (256), /* fp_mult_df */ \ ++ COSTS_N_INSNS (256), /* fp_div_sf */ \ ++ COSTS_N_INSNS (256) /* fp_div_df */ ++ ++/* Costs to use when optimizing for size. */ ++static const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = { ++ COSTS_N_INSNS (1), /* fp_add */ ++ COSTS_N_INSNS (1), /* fp_mult_sf */ ++ COSTS_N_INSNS (1), /* fp_mult_df */ ++ COSTS_N_INSNS (1), /* fp_div_sf */ ++ COSTS_N_INSNS (1), /* fp_div_df */ ++ COSTS_N_INSNS (1), /* int_mult_si */ ++ COSTS_N_INSNS (1), /* int_mult_di */ ++ COSTS_N_INSNS (1), /* int_div_si */ ++ COSTS_N_INSNS (1), /* int_div_di */ ++ 2, /* branch_cost */ ++ 4 /* memory_latency */ ++}; ++ ++/* Costs to use when optimizing for speed, indexed by processor. */ ++static const struct loongarch_rtx_cost_data ++ loongarch_rtx_cost_data[NUM_PROCESSOR_VALUES] = { ++ { /* loongarch */ ++ DEFAULT_COSTS ++ }, ++ { /* loongarch64 */ ++ DEFAULT_COSTS ++ }, ++ { /* gs464v */ ++ DEFAULT_COSTS ++ } ++}; ++ ++/* Information about a single argument. */ ++struct n_loongarch_arg_info { ++ /* True if the argument is at least partially passed on the stack. */ ++ bool stack_p; ++ ++ /* The number of integer registers allocated to this argument. */ ++ unsigned int num_gprs; ++ ++ /* The offset of the first register used, provided num_gprs is nonzero. ++ If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */ ++ unsigned int gpr_offset; ++ ++ /* The number of floating-point registers allocated to this argument. */ ++ unsigned int num_fprs; ++ ++ /* The offset of the first register used, provided num_fprs is nonzero. */ ++ unsigned int fpr_offset; ++}; ++ ++ ++/* Emit a move from SRC to DEST. Assume that the move expanders can ++ handle all moves if !can_create_pseudo_p (). The distinction is ++ important because, unlike emit_move_insn, the move expanders know ++ how to force Pmode objects into the constant pool even when the ++ constant pool address is not itself legitimate. */ ++ ++rtx ++n_loongarch_emit_move (rtx dest, rtx src) ++{ ++ return (can_create_pseudo_p () ++ ? emit_move_insn (dest, src) ++ : emit_move_insn_1 (dest, src)); ++} ++ ++/* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at ++ least PARM_BOUNDARY bits of alignment, but will be given anything up ++ to PREFERRED_STACK_BOUNDARY bits if the type requires it. */ ++ ++static unsigned int ++n_loongarch_function_arg_boundary (machine_mode mode, const_tree type) ++{ ++ unsigned int alignment; ++ ++ /* Use natural alignment if the type is not aggregate data. */ ++ if (type && !AGGREGATE_TYPE_P (type)) ++ alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type)); ++ else ++ alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); ++ ++ return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment)); ++} ++ ++/* If MODE represents an argument that can be passed or returned in ++ floating-point registers, return the number of registers, else 0. */ ++ ++static unsigned ++n_loongarch_pass_mode_in_fpr_p (machine_mode mode) ++{ ++ if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG) ++ { ++ if (GET_MODE_CLASS (mode) == MODE_FLOAT) ++ return 1; ++ ++ if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) ++ return 2; ++ } ++ ++ return 0; ++} ++ ++typedef struct { ++ const_tree type; ++ HOST_WIDE_INT offset; ++} n_loongarch_aggregate_field; ++ ++/* Identify subfields of aggregates that are candidates for passing in ++ floating-point registers. */ ++ ++static int ++n_loongarch_flatten_aggregate_field (const_tree type, ++ n_loongarch_aggregate_field fields[2], ++ int n, HOST_WIDE_INT offset) ++{ ++ switch (TREE_CODE (type)) ++ { ++ case RECORD_TYPE: ++ /* Can't handle incomplete types nor sizes that are not fixed. */ ++ if (!COMPLETE_TYPE_P (type) ++ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST ++ || !tree_fits_uhwi_p (TYPE_SIZE (type))) ++ return -1; ++ ++ for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) ++ if (TREE_CODE (f) == FIELD_DECL) ++ { ++ if (!TYPE_P (TREE_TYPE (f))) ++ return -1; ++ ++ HOST_WIDE_INT pos = offset + int_byte_position (f); ++ n = n_loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos); ++ if (n < 0) ++ return -1; ++ } ++ return n; ++ ++ case ARRAY_TYPE: ++ { ++ HOST_WIDE_INT n_elts; ++ n_loongarch_aggregate_field subfields[2]; ++ tree index = TYPE_DOMAIN (type); ++ tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); ++ int n_subfields = n_loongarch_flatten_aggregate_field (TREE_TYPE (type), ++ subfields, 0, offset); ++ ++ /* Can't handle incomplete types nor sizes that are not fixed. */ ++ if (n_subfields <= 0 ++ || !COMPLETE_TYPE_P (type) ++ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST ++ || !index ++ || !TYPE_MAX_VALUE (index) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) ++ || !TYPE_MIN_VALUE (index) ++ || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) ++ || !tree_fits_uhwi_p (elt_size)) ++ return -1; ++ ++ n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) ++ - tree_to_uhwi (TYPE_MIN_VALUE (index)); ++ gcc_assert (n_elts >= 0); ++ ++ for (HOST_WIDE_INT i = 0; i < n_elts; i++) ++ for (int j = 0; j < n_subfields; j++) ++ { ++ if (n >= 2) ++ return -1; ++ ++ fields[n] = subfields[j]; ++ fields[n++].offset += i * tree_to_uhwi (elt_size); ++ } ++ ++ return n; ++ } ++ ++ case COMPLEX_TYPE: ++ { ++ /* Complex type need consume 2 field, so n must be 0. */ ++ if (n != 0) ++ return -1; ++ ++ HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))); ++ ++ if (elt_size <= UNITS_PER_FP_ARG) ++ { ++ fields[0].type = TREE_TYPE (type); ++ fields[0].offset = offset; ++ fields[1].type = TREE_TYPE (type); ++ fields[1].offset = offset + elt_size; ++ ++ return 2; ++ } ++ ++ return -1; ++ } ++ ++ default: ++ if (n < 2 ++ && ((SCALAR_FLOAT_TYPE_P (type) ++ && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG) ++ || (INTEGRAL_TYPE_P (type) ++ && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))) ++ { ++ fields[n].type = type; ++ fields[n].offset = offset; ++ return n + 1; ++ } ++ else ++ return -1; ++ } ++} ++ ++/* Identify candidate aggregates for passing in floating-point registers. ++ Candidates have at most two fields after flattening. */ ++ ++static int ++n_loongarch_flatten_aggregate_argument (const_tree type, ++ n_loongarch_aggregate_field fields[2]) ++{ ++ if (!type || TREE_CODE (type) != RECORD_TYPE) ++ return -1; ++ ++ return n_loongarch_flatten_aggregate_field (type, fields, 0, 0); ++} ++ ++/* See whether TYPE is a record whose fields should be returned in one or ++ two floating-point registers. If so, populate FIELDS accordingly. */ ++ ++static unsigned ++n_loongarch_pass_aggregate_in_fpr_pair_p (const_tree type, ++ n_loongarch_aggregate_field fields[2]) ++{ ++ int n = n_loongarch_flatten_aggregate_argument (type, fields); ++ ++ for (int i = 0; i < n; i++) ++ if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) ++ return 0; ++ ++ return n > 0 ? n : 0; ++} ++ ++/* See whether TYPE is a record whose fields should be returned in one or ++ floating-point register and one integer register. If so, populate ++ FIELDS accordingly. */ ++ ++static bool ++n_loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type, ++ n_loongarch_aggregate_field fields[2]) ++{ ++ unsigned num_int = 0, num_float = 0; ++ int n = n_loongarch_flatten_aggregate_argument (type, fields); ++ ++ for (int i = 0; i < n; i++) ++ { ++ num_float += SCALAR_FLOAT_TYPE_P (fields[i].type); ++ num_int += INTEGRAL_TYPE_P (fields[i].type); ++ } ++ ++ return num_int == 1 && num_float == 1; ++} ++ ++/* Return the representation of an argument passed or returned in an FPR ++ when the value has mode VALUE_MODE and the type has TYPE_MODE. The ++ two modes may be different for structures like: ++ ++ struct __attribute__((packed)) foo { float f; } ++ ++ where the SFmode value "f" is passed in REGNO but the struct itself ++ has mode BLKmode. */ ++ ++static rtx ++n_loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno, ++ machine_mode value_mode) ++{ ++ rtx x = gen_rtx_REG (value_mode, regno); ++ ++ if (type_mode != value_mode) ++ { ++ x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); ++ x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); ++ } ++ return x; ++} ++ ++/* Pass or return a composite value in the FPR pair REGNO and REGNO + 1. ++ MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and ++ byte offset for the first value, likewise MODE2 and OFFSET2 for the ++ second value. */ ++ ++static rtx ++n_loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1, ++ machine_mode mode1, HOST_WIDE_INT offset1, ++ unsigned regno2, machine_mode mode2, ++ HOST_WIDE_INT offset2) ++{ ++ return gen_rtx_PARALLEL ++ (mode, ++ gen_rtvec (2, ++ gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (mode1, regno1), ++ GEN_INT (offset1)), ++ gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (mode2, regno2), ++ GEN_INT (offset2)))); ++} ++ ++/* Fill INFO with information about a single argument, and return an ++ RTL pattern to pass or return the argument. CUM is the cumulative ++ state for earlier arguments. MODE is the mode of this argument and ++ TYPE is its type (if known). NAMED is true if this is a named ++ (fixed) argument rather than a variable one. RETURN_P is true if ++ returning the argument, or false if passing the argument. */ ++ ++static rtx ++n_loongarch_get_arg_info (struct n_loongarch_arg_info *info, const CUMULATIVE_ARGS *cum, ++ machine_mode mode, const_tree type, bool named, ++ bool return_p) ++{ ++ unsigned num_bytes, num_words; ++ unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; ++ unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST; ++ unsigned alignment = n_loongarch_function_arg_boundary (mode, type); ++ ++ memset (info, 0, sizeof (*info)); ++ info->gpr_offset = cum->num_gprs; ++ info->fpr_offset = cum->num_fprs; ++ ++ if (named) ++ { ++ n_loongarch_aggregate_field fields[2]; ++ unsigned fregno = fpr_base + info->fpr_offset; ++ unsigned gregno = gpr_base + info->gpr_offset; ++ ++ /* Pass one- or two-element floating-point aggregates in FPRs. */ ++ if ((info->num_fprs = n_loongarch_pass_aggregate_in_fpr_pair_p (type, fields)) ++ && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) ++ switch (info->num_fprs) ++ { ++ case 1: ++ return n_loongarch_pass_fpr_single (mode, fregno, ++ TYPE_MODE (fields[0].type)); ++ ++ case 2: ++ return n_loongarch_pass_fpr_pair (mode, fregno, ++ TYPE_MODE (fields[0].type), ++ fields[0].offset, ++ fregno + 1, ++ TYPE_MODE (fields[1].type), ++ fields[1].offset); ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* Pass real and complex floating-point numbers in FPRs. */ ++ if ((info->num_fprs = n_loongarch_pass_mode_in_fpr_p (mode)) ++ && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) ++ switch (GET_MODE_CLASS (mode)) ++ { ++ case MODE_FLOAT: ++ return gen_rtx_REG (mode, fregno); ++ ++ case MODE_COMPLEX_FLOAT: ++ return n_loongarch_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0, ++ fregno + 1, GET_MODE_INNER (mode), ++ GET_MODE_UNIT_SIZE (mode)); ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* Pass structs with one float and one integer in an FPR and a GPR. */ ++ if (n_loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields) ++ && info->gpr_offset < MAX_ARGS_IN_REGISTERS ++ && info->fpr_offset < MAX_ARGS_IN_REGISTERS) ++ { ++ info->num_gprs = 1; ++ info->num_fprs = 1; ++ ++ if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) ++ std::swap (fregno, gregno); ++ ++ return n_loongarch_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type), ++ fields[0].offset, ++ gregno, TYPE_MODE (fields[1].type), ++ fields[1].offset); ++ } ++ } ++ ++ /* Work out the size of the argument. */ ++ num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); ++ num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++ ++ /* Doubleword-aligned varargs start on an even register boundary. */ ++ if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) ++ info->gpr_offset += info->gpr_offset & 1; ++ ++ /* Partition the argument between registers and stack. */ ++ info->num_fprs = 0; ++ info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); ++ info->stack_p = (num_words - info->num_gprs) != 0; ++ ++ if (info->num_gprs || return_p) ++ return gen_rtx_REG (mode, gpr_base + info->gpr_offset); ++ ++ return NULL_RTX; ++} ++ ++/* Implement TARGET_FUNCTION_ARG. */ ++ ++static rtx ++n_loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) ++{ ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ struct n_loongarch_arg_info info; ++ ++ if (arg.end_marker_p ()) ++ return NULL; ++ ++ return n_loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); ++} ++ ++/* Implement TARGET_FUNCTION_ARG_ADVANCE. */ ++ ++static void ++n_loongarch_function_arg_advance (cumulative_args_t cum_v, ++ const function_arg_info &arg) ++{ ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ struct n_loongarch_arg_info info; ++ ++ n_loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); ++ ++ /* Advance the register count. This has the effect of setting ++ num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned ++ argument required us to skip the final GPR and pass the whole ++ argument on the stack. */ ++ cum->num_fprs = info.fpr_offset + info.num_fprs; ++ cum->num_gprs = info.gpr_offset + info.num_gprs; ++} ++ ++/* Implement TARGET_ARG_PARTIAL_BYTES. */ ++ ++static int ++n_loongarch_arg_partial_bytes (cumulative_args_t cum, ++ const function_arg_info &generic_arg) ++{ ++ struct n_loongarch_arg_info arg; ++ ++ n_loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode, ++ generic_arg.type, generic_arg.named, false); ++ return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; ++} ++ ++/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, ++ VALTYPE is the return type and MODE is VOIDmode. For libcalls, ++ VALTYPE is null and MODE is the mode of the return value. */ ++ ++rtx ++n_loongarch_function_value (const_tree type, const_tree func, machine_mode mode) ++{ ++ struct n_loongarch_arg_info info; ++ CUMULATIVE_ARGS args; ++ ++ if (type) ++ { ++ int unsigned_p = TYPE_UNSIGNED (type); ++ ++ mode = TYPE_MODE (type); ++ ++ /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, ++ return values, promote the mode here too. */ ++ mode = promote_function_mode (type, mode, &unsigned_p, func, 1); ++ } ++ ++ memset (&args, 0, sizeof args); ++ return n_loongarch_get_arg_info (&info, &args, mode, type, true, true); ++} ++ ++/* Implement TARGET_PASS_BY_REFERENCE. */ ++ ++static bool ++n_loongarch_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) ++{ ++ //HOST_WIDE_INT size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); ++ HOST_WIDE_INT size = arg.type_size_in_bytes (); ++ struct n_loongarch_arg_info info; ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ ++ /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we ++ never pass variadic arguments in floating-point registers, so we can ++ avoid the call to n_loongarch_get_arg_info in this case. */ ++ if (cum != NULL) ++ { ++ /* Don't pass by reference if we can use a floating-point register. */ ++ n_loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); ++ if (info.num_fprs) ++ return false; ++ } ++ ++ /* Pass by reference if the data do not fit in two integer registers. */ ++ return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); ++} ++ ++/* Implement TARGET_RETURN_IN_MEMORY. */ ++ ++static bool ++n_loongarch_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) ++{ ++ CUMULATIVE_ARGS args; ++ cumulative_args_t cum = pack_cumulative_args (&args); ++ ++ /* The rules for returning in memory are the same as for passing the ++ first named argument by reference. */ ++ memset (&args, 0, sizeof args); ++ function_arg_info arg (const_cast (type), /*named=*/true); ++ return n_loongarch_pass_by_reference (cum, arg); ++} ++ ++/* Implement TARGET_SETUP_INCOMING_VARARGS. */ ++ ++static void ++n_loongarch_setup_incoming_varargs (cumulative_args_t cum, ++ const function_arg_info &arg, ++ int *pretend_size ATTRIBUTE_UNUSED, ++ int no_rtl) ++{ ++ CUMULATIVE_ARGS local_cum; ++ int gp_saved; ++ ++ /* The caller has advanced CUM up to, but not beyond, the last named ++ argument. Advance a local copy of CUM past the last "real" named ++ argument, to find out how many registers are left over. */ ++ local_cum = *get_cumulative_args (cum); ++ n_loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ ++ /* Found out how many registers we need to save. */ ++ gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; ++ ++ if (!no_rtl && gp_saved > 0) ++ { ++ rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, ++ REG_PARM_STACK_SPACE (cfun->decl) ++ - gp_saved * UNITS_PER_WORD); ++ rtx mem = gen_frame_mem (BLKmode, ptr); ++ set_mem_alias_set (mem, get_varargs_alias_set ()); ++ ++ move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, ++ mem, gp_saved); ++ } ++ if (REG_PARM_STACK_SPACE (cfun->decl) == 0) ++ cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; ++} ++ ++/* Make the last instruction frame-related and note that it performs ++ the operation described by FRAME_PATTERN. */ ++ ++static void ++n_loongarch_set_frame_expr (rtx frame_pattern) ++{ ++ rtx insn; ++ ++ insn = get_last_insn (); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, ++ frame_pattern, ++ REG_NOTES (insn)); ++} ++ ++/* Return a frame-related rtx that stores REG at MEM. ++ REG must be a single register. */ ++ ++static rtx ++n_loongarch_frame_set (rtx mem, rtx reg) ++{ ++ rtx set = gen_rtx_SET (mem, reg); ++ RTX_FRAME_RELATED_P (set) = 1; ++ return set; ++} ++ ++/* Return true if the current function must save register REGNO. */ ++ ++static bool ++n_loongarch_save_reg_p (unsigned int regno) ++{ ++ bool call_saved = !global_regs[regno] && !call_used_regs[regno]; ++ bool might_clobber = crtl->saves_all_registers ++ || df_regs_ever_live_p (regno); ++ ++ if (call_saved && might_clobber) ++ return true; ++ ++ if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) ++ return true; ++ ++ if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) ++ return true; ++ ++ return false; ++} ++ ++/* Determine whether to call GPR save/restore routines. */ ++static bool ++n_loongarch_use_save_libcall (const struct loongarch_frame_info *frame) ++{ ++ // FIXME: if (!TARGET_SAVE_RESTORE || crtl->calls_eh_return || frame_pointer_needed) ++ return false; ++ ++} ++ ++/* Determine which GPR save/restore routine to call. */ ++ ++static unsigned ++n_loongarch_save_libcall_count (unsigned mask) ++{ ++ for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) ++ if (BITSET_P (mask, n)) ++ return CALLEE_SAVED_REG_NUMBER (n) + 1; ++ abort (); ++} ++ ++/* Populate the current function's loongarch_frame_info structure. ++ ++ LARCH stack frames grown downward. High addresses are at the top. ++ ++ +-------------------------------+ ++ | | ++ | incoming stack arguments | ++ | | ++ +-------------------------------+ <-- incoming stack pointer ++ | | ++ | callee-allocated save area | ++ | for arguments that are | ++ | split between registers and | ++ | the stack | ++ | | ++ +-------------------------------+ <-- arg_pointer_rtx ++ | | ++ | callee-allocated save area | ++ | for register varargs | ++ | | ++ +-------------------------------+ <-- hard_frame_pointer_rtx; ++ | | stack_pointer_rtx + gp_sp_offset ++ | GPR save area | + UNITS_PER_WORD ++ | | ++ +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset ++ | | + UNITS_PER_HWVALUE ++ | FPR save area | ++ | | ++ +-------------------------------+ <-- frame_pointer_rtx (virtual) ++ | | ++ | local variables | ++ | | ++ P +-------------------------------+ ++ | | ++ | outgoing stack arguments | ++ | | ++ +-------------------------------+ <-- stack_pointer_rtx ++ ++ Dynamic stack allocations such as alloca insert data at point P. ++ They decrease stack_pointer_rtx but leave frame_pointer_rtx and ++ hard_frame_pointer_rtx unchanged. */ ++ ++static void ++n_loongarch_compute_frame_info (void) ++{ ++ struct loongarch_frame_info *frame; ++ HOST_WIDE_INT offset; ++ unsigned int regno, i, num_x_saved = 0, num_f_saved = 0; ++ ++ frame = &cfun->machine->frame; ++ memset (frame, 0, sizeof (*frame)); ++ ++ /* Find out which GPRs we need to save. */ ++ for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (n_loongarch_save_reg_p (regno)) ++ frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; ++ ++ /* If this function calls eh_return, we must also save and restore the ++ EH data registers. */ ++ if (crtl->calls_eh_return) ++ for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) ++ frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; ++ ++ /* Find out which FPRs we need to save. This loop must iterate over ++ the same space as its companion in n_loongarch_for_each_saved_reg. */ ++ if (TARGET_HARD_FLOAT) ++ for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) ++ if (n_loongarch_save_reg_p (regno)) ++ frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; ++ ++ /* At the bottom of the frame are any outgoing stack arguments. */ ++ offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size); ++ /* Next are local stack variables. */ ++ offset += LARCH_STACK_ALIGN (get_frame_size ()); ++ /* The virtual frame pointer points above the local variables. */ ++ frame->frame_pointer_offset = offset; ++ /* Next are the callee-saved FPRs. */ ++ if (frame->fmask) ++ offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); ++ frame->fp_sp_offset = offset - UNITS_PER_FP_REG; ++ /* Next are the callee-saved GPRs. */ ++ if (frame->mask) ++ { ++ unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); ++ unsigned num_save_restore = 1 + n_loongarch_save_libcall_count (frame->mask); ++ ++ /* Only use save/restore routines if they don't alter the stack size. */ ++ if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size) ++ frame->save_libcall_adjustment = x_save_size; ++ ++ offset += x_save_size; ++ } ++ frame->gp_sp_offset = offset - UNITS_PER_WORD; ++ /* The hard frame pointer points above the callee-saved GPRs. */ ++ frame->hard_frame_pointer_offset = offset; ++ /* Above the hard frame pointer is the callee-allocated varags save area. */ ++ offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); ++ /* Next is the callee-allocated area for pretend stack arguments. */ ++ offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size); ++ /* Arg pointer must be below pretend args, but must be above alignment ++ padding. */ ++ frame->arg_pointer_offset = offset - crtl->args.pretend_args_size; ++ frame->total_size = offset; ++ /* Next points the incoming stack pointer and any incoming arguments. */ ++ ++ /* Only use save/restore routines when the GPRs are atop the frame. */ ++ if (frame->hard_frame_pointer_offset != frame->total_size) ++ frame->save_libcall_adjustment = 0; ++} ++ ++/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer ++ or argument pointer. TO is either the stack pointer or hard frame ++ pointer. */ ++ ++HOST_WIDE_INT ++n_loongarch_initial_elimination_offset (int from, int to) ++{ ++ HOST_WIDE_INT src, dest; ++ ++ n_loongarch_compute_frame_info (); ++ ++ if (to == HARD_FRAME_POINTER_REGNUM) ++ dest = cfun->machine->frame.hard_frame_pointer_offset; ++ else if (to == STACK_POINTER_REGNUM) ++ dest = 0; /* The stack pointer is the base of all offsets, hence 0. */ ++ else ++ gcc_unreachable (); ++ ++ if (from == FRAME_POINTER_REGNUM) ++ src = cfun->machine->frame.frame_pointer_offset; ++ else if (from == ARG_POINTER_REGNUM) ++ src = cfun->machine->frame.arg_pointer_offset; ++ else ++ gcc_unreachable (); ++ ++ return src - dest; ++} ++ ++/* A function to save or store a register. The first argument is the ++ register and the second is the stack slot. */ ++typedef void (*n_loongarch_save_restore_fn) (rtx, rtx); ++ ++/* Use FN to save or restore register REGNO. MODE is the register's ++ mode and OFFSET is the offset of its save slot from the current ++ stack pointer. */ ++ ++static void ++n_loongarch_save_restore_reg (machine_mode mode, int regno, ++ HOST_WIDE_INT offset, n_loongarch_save_restore_fn fn) ++{ ++ rtx mem; ++ ++ mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset)); ++ fn (gen_rtx_REG (mode, regno), mem); ++} ++ ++/* Call FN for each register that is saved by the current function. ++ SP_OFFSET is the offset of the current stack pointer from the start ++ of the frame. */ ++ ++static void ++n_loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, n_loongarch_save_restore_fn fn) ++{ ++ HOST_WIDE_INT offset; ++ ++ /* Save the link register and s-registers. */ ++ offset = cfun->machine->frame.gp_sp_offset - sp_offset; ++ for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) ++ { ++ n_loongarch_save_restore_reg (word_mode, regno, offset, fn); ++ offset -= UNITS_PER_WORD; ++ } ++ ++ /* This loop must iterate over the same space as its companion in ++ n_loongarch_compute_frame_info. */ ++ offset = cfun->machine->frame.fp_sp_offset - sp_offset; ++ for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) ++ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) ++ { ++ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; ++ ++ n_loongarch_save_restore_reg (mode, regno, offset, fn); ++ offset -= GET_MODE_SIZE (mode); ++ } ++} ++ ++/* Save register REG to MEM. Make the instruction frame-related. */ ++ ++static void ++n_loongarch_save_reg (rtx reg, rtx mem) ++{ ++ n_loongarch_emit_move (mem, reg); ++ n_loongarch_set_frame_expr (n_loongarch_frame_set (mem, reg)); ++} ++ ++/* Restore register REG from MEM. */ ++ ++static void ++n_loongarch_restore_reg (rtx reg, rtx mem) ++{ ++ rtx insn = n_loongarch_emit_move (reg, mem); ++ rtx dwarf = NULL_RTX; ++ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); ++ REG_NOTES (insn) = dwarf; ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++} ++ ++/* Return the code to invoke the GPR save routine. */ ++ ++const char * ++n_loongarch_output_gpr_save (unsigned mask) ++{ ++ static char s[32]; ++ unsigned n = n_loongarch_save_libcall_count (mask); ++ ++ ssize_t bytes = snprintf (s, sizeof (s), "call\tt0,__n_loongarch_save_%u", n); ++ gcc_assert ((size_t) bytes < sizeof (s)); ++ ++ return s; ++} ++ ++#define IMM_BITS 12 ++ ++#define IMM_REACH (1LL << IMM_BITS) ++ ++/* For stack frames that can't be allocated with a single ADDI instruction, ++ compute the best value to initially allocate. It must at a minimum ++ allocate enough space to spill the callee-saved registers. If TARGET_RVC, ++ try to pick a value that will allow compression of the register saves ++ without adding extra instructions. */ ++ ++static HOST_WIDE_INT ++n_loongarch_first_stack_step (struct loongarch_frame_info *frame) ++{ ++ if (SMALL_OPERAND (frame->total_size)) ++ return frame->total_size; ++ ++ HOST_WIDE_INT min_first_step = ++ LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset); ++ HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; ++ HOST_WIDE_INT min_second_step = frame->total_size - max_first_step; ++ gcc_assert (min_first_step <= max_first_step); ++ ++ /* As an optimization, use the least-significant bits of the total frame ++ size, so that the second adjustment step is just LUI + ADD. */ ++ if (!SMALL_OPERAND (min_second_step) ++ && frame->total_size % IMM_REACH < IMM_REACH / 2 ++ && frame->total_size % IMM_REACH >= min_first_step) ++ return frame->total_size % IMM_REACH; ++ ++ return max_first_step; ++} ++ ++static rtx ++n_loongarch_adjust_libcall_cfi_prologue () ++{ ++ rtx dwarf = NULL_RTX; ++ rtx adjust_sp_rtx, reg, mem, insn; ++ int saved_size = cfun->machine->frame.save_libcall_adjustment; ++ int offset; ++ ++ for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) ++ { ++ /* The save order is ra, s0 to s8. */ ++ if (regno == RETURN_ADDR_REGNUM) ++ offset = saved_size - UNITS_PER_WORD; ++ else ++ offset = saved_size - ((regno - S0_REGNUM + 2) * UNITS_PER_WORD); ++ ++ reg = gen_rtx_REG (SImode, regno); ++ mem = gen_frame_mem (SImode, plus_constant (Pmode, ++ stack_pointer_rtx, ++ offset)); ++ ++ insn = gen_rtx_SET (mem, reg); ++ dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf); ++ } ++ ++ /* Debug info for adjust sp. */ ++ adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, GEN_INT (-saved_size)); ++ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, ++ dwarf); ++ return dwarf; ++} ++ ++static void ++n_loongarch_emit_stack_tie (void) ++{ ++ if (Pmode == SImode) ++ emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx)); ++ else ++ emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx)); ++} ++ ++/* Expand the "prologue" pattern. */ ++ ++void ++n_loongarch_expand_prologue (void) ++{ ++ struct loongarch_frame_info *frame = &cfun->machine->frame; ++ HOST_WIDE_INT size = frame->total_size; ++ unsigned mask = frame->mask; ++ rtx insn; ++ ++ if (flag_stack_usage_info) ++ current_function_static_stack_size = size; ++ ++ /* When optimizing for size, call a subroutine to save the registers. */ ++ if (n_loongarch_use_save_libcall (frame)) ++ { ++ rtx dwarf = NULL_RTX; ++ dwarf = n_loongarch_adjust_libcall_cfi_prologue (); ++ ++ frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ ++ size -= frame->save_libcall_adjustment; ++ insn = emit_insn (gen_gpr_save (GEN_INT (mask))); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ REG_NOTES (insn) = dwarf; ++ } ++ ++ /* Save the registers. */ ++ if ((frame->mask | frame->fmask) != 0) ++ { ++ HOST_WIDE_INT step1 = MIN (size, n_loongarch_first_stack_step (frame)); ++ ++ insn = gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-step1)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ size -= step1; ++ n_loongarch_for_each_saved_reg (size, n_loongarch_save_reg); ++ } ++ ++ frame->mask = mask; /* Undo the above fib. */ ++ ++ /* Set up the frame pointer, if we're using one. */ ++ if (frame_pointer_needed) ++ { ++ insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (frame->hard_frame_pointer_offset - size)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ ++ n_loongarch_emit_stack_tie (); ++ } ++ ++ /* Allocate the rest of the frame. */ ++ if (size > 0) ++ { ++ if (SMALL_OPERAND (-size)) ++ { ++ insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-size)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ } ++ else ++ { ++ n_loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); ++ emit_insn (gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ N_LARCH_PROLOGUE_TEMP (Pmode))); ++ ++ /* Describe the effect of the previous instructions. */ ++ insn = plus_constant (Pmode, stack_pointer_rtx, -size); ++ insn = gen_rtx_SET (stack_pointer_rtx, insn); ++ n_loongarch_set_frame_expr (insn); ++ } ++ } ++} ++ ++/* Return nonzero if this function is known to have a null epilogue. ++ This allows the optimizer to omit jumps to jumps if no stack ++ was created. */ ++ ++bool ++n_loongarch_can_use_return_insn (void) ++{ ++ return reload_completed && cfun->machine->frame.total_size == 0; ++} ++ ++static rtx ++n_loongarch_adjust_libcall_cfi_epilogue () ++{ ++ rtx dwarf = NULL_RTX; ++ rtx adjust_sp_rtx, reg; ++ int saved_size = cfun->machine->frame.save_libcall_adjustment; ++ ++ /* Debug info for adjust sp. */ ++ adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, GEN_INT (saved_size)); ++ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, ++ dwarf); ++ ++ for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) ++ { ++ reg = gen_rtx_REG (SImode, regno); ++ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); ++ } ++ ++ return dwarf; ++} ++ ++/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P ++ says which. */ ++ ++void ++n_loongarch_expand_epilogue (bool sibcall_p) ++{ ++ /* Split the frame into two. STEP1 is the amount of stack we should ++ deallocate before restoring the registers. STEP2 is the amount we ++ should deallocate afterwards. ++ ++ Start off by assuming that no registers need to be restored. */ ++ struct loongarch_frame_info *frame = &cfun->machine->frame; ++ unsigned mask = frame->mask; ++ HOST_WIDE_INT step1 = frame->total_size; ++ HOST_WIDE_INT step2 = 0; ++ bool use_restore_libcall = !sibcall_p && n_loongarch_use_save_libcall (frame); ++ rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); ++ rtx insn; ++ ++ /* We need to add memory barrier to prevent read from deallocated stack. */ ++ bool need_barrier_p = (get_frame_size () ++ + cfun->machine->frame.arg_pointer_offset) != 0; ++ ++ if (!sibcall_p && n_loongarch_can_use_return_insn ()) ++ { ++ emit_jump_insn (gen_return ()); ++ return; ++ } ++ ++ /* Move past any dynamic stack allocations. */ ++ if (cfun->calls_alloca) ++ { ++ /* Emit a barrier to prevent loads from a deallocated stack. */ ++ n_loongarch_emit_stack_tie (); ++ need_barrier_p = false; ++ ++ rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); ++ if (!SMALL_OPERAND (INTVAL (adjust))) ++ { ++ n_loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust); ++ adjust = N_LARCH_PROLOGUE_TEMP (Pmode); ++ } ++ ++ insn = emit_insn ( ++ gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, ++ adjust)); ++ ++ rtx dwarf = NULL_RTX; ++ rtx cfa_adjust_value = gen_rtx_PLUS ( ++ Pmode, hard_frame_pointer_rtx, ++ GEN_INT (-frame->hard_frame_pointer_offset)); ++ rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); ++ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ REG_NOTES (insn) = dwarf; ++ } ++ ++ /* If we need to restore registers, deallocate as much stack as ++ possible in the second step without going out of range. */ ++ if ((frame->mask | frame->fmask) != 0) ++ { ++ step2 = n_loongarch_first_stack_step (frame); ++ step1 -= step2; ++ } ++ ++ /* Set TARGET to BASE + STEP1. */ ++ if (step1 > 0) ++ { ++ /* Emit a barrier to prevent loads from a deallocated stack. */ ++ n_loongarch_emit_stack_tie (); ++ need_barrier_p = false; ++ ++ /* Get an rtx for STEP1 that we can add to BASE. */ ++ rtx adjust = GEN_INT (step1); ++ if (!SMALL_OPERAND (step1)) ++ { ++ n_loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust); ++ adjust = N_LARCH_PROLOGUE_TEMP (Pmode); ++ } ++ ++ insn = emit_insn ( ++ gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, adjust)); ++ ++ rtx dwarf = NULL_RTX; ++ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ GEN_INT (step2)); ++ ++ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ REG_NOTES (insn) = dwarf; ++ } ++ ++ if (use_restore_libcall) ++ frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ ++ ++ /* Restore the registers. */ ++ n_loongarch_for_each_saved_reg (frame->total_size - step2, n_loongarch_restore_reg); ++ ++ if (use_restore_libcall) ++ { ++ frame->mask = mask; /* Undo the above fib. */ ++ gcc_assert (step2 >= frame->save_libcall_adjustment); ++ step2 -= frame->save_libcall_adjustment; ++ } ++ ++ if (need_barrier_p) ++ n_loongarch_emit_stack_tie (); ++ ++ /* Deallocate the final bit of the frame. */ ++ if (step2 > 0) ++ { ++ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (step2))); ++ ++ rtx dwarf = NULL_RTX; ++ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ const0_rtx); ++ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ REG_NOTES (insn) = dwarf; ++ } ++ ++ if (use_restore_libcall) ++ { ++ rtx dwarf = n_loongarch_adjust_libcall_cfi_epilogue (); ++ insn = emit_insn (gen_gpr_restore (GEN_INT (n_loongarch_save_libcall_count (mask)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ REG_NOTES (insn) = dwarf; ++ ++ emit_jump_insn (gen_gpr_restore_return (ra)); ++ return; ++ } ++ ++ /* Add in the __builtin_eh_return stack adjustment. */ ++ if (crtl->calls_eh_return) ++ emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, ++ EH_RETURN_STACKADJ_RTX)); ++ ++ if (!sibcall_p) ++ emit_jump_insn (gen_simple_return_internal (ra)); ++} ++ ++ ++static rtx loongarch_find_pic_call_symbol (rtx_insn *, rtx, bool); ++static int loongarch_register_move_cost (machine_mode, reg_class_t, ++ reg_class_t); ++ ++/* Predicates to test for presence of "near"/"short_call" and "far"/"long_call" ++ attributes on the given TYPE. */ ++ ++static bool ++loongarch_near_type_p (const_tree type) ++{ ++ return (lookup_attribute ("short_call", TYPE_ATTRIBUTES (type)) != NULL ++ || lookup_attribute ("near", TYPE_ATTRIBUTES (type)) != NULL); ++} ++ ++static bool ++loongarch_far_type_p (const_tree type) ++{ ++ return (lookup_attribute ("long_call", TYPE_ATTRIBUTES (type)) != NULL ++ || lookup_attribute ("far", TYPE_ATTRIBUTES (type)) != NULL); ++} ++ ++ ++/* Check if the interrupt attribute is set for a function. */ ++ ++static bool ++loongarch_interrupt_type_p (tree type) ++{ ++ return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL; ++} ++ ++/* Return the compression mode that should be used for function DECL. ++ Return the ambient setting if DECL is null. */ ++ ++static unsigned int ++loongarch_get_compress_mode (tree decl) ++{ ++ unsigned int flags; ++ ++ flags = loongarch_base_compression_flags; ++ if (decl) ++ { ++ /* Nested functions must use the same frame pointer as their ++ parent and must therefore use the same ISA mode. */ ++ tree parent = decl_function_context (decl); ++ if (parent) ++ decl = parent; ++ } ++ return flags; ++} ++ ++/* Implement TARGET_COMP_TYPE_ATTRIBUTES. */ ++ ++static int ++loongarch_comp_type_attributes (const_tree type1, const_tree type2) ++{ ++ /* Disallow mixed near/far attributes. */ ++ if (loongarch_far_type_p (type1) && loongarch_near_type_p (type2)) ++ return 0; ++ if (loongarch_near_type_p (type1) && loongarch_far_type_p (type2)) ++ return 0; ++ return 1; ++} ++ ++/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ ++ ++static tree ++loongarch_merge_decl_attributes (tree olddecl, tree newdecl) ++{ ++ return merge_attributes (DECL_ATTRIBUTES (olddecl), ++ DECL_ATTRIBUTES (newdecl)); ++} ++ ++/* Implement TARGET_CAN_INLINE_P. */ ++ ++static bool ++loongarch_can_inline_p (tree caller, tree callee) ++{ ++ if (loongarch_get_compress_mode (callee) != loongarch_get_compress_mode (caller)) ++ return false; ++ return default_target_can_inline_p (caller, callee); ++} ++ ++/* Handle an "interrupt" attribute with an optional argument. */ ++ ++static tree ++loongarch_handle_interrupt_attr (tree *node ATTRIBUTE_UNUSED, tree name, tree args, ++ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) ++{ ++ /* Check for an argument. */ ++ if (is_attribute_p ("interrupt", name) && args != NULL) ++ { ++ tree cst; ++ ++ cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != STRING_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires a string argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (strcmp (TREE_STRING_POINTER (cst), "eic") != 0 ++ && strncmp (TREE_STRING_POINTER (cst), "vector=", 7) != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is neither eic, nor " ++ "vector=", name); ++ *no_add_attrs = true; ++ } ++ else if (strncmp (TREE_STRING_POINTER (cst), "vector=", 7) == 0) ++ { ++ const char *arg = TREE_STRING_POINTER (cst) + 7; ++ ++ /* Acceptable names are: sw0,sw1,hw0,hw1,hw2,hw3,hw4,hw5. */ ++ if (strlen (arg) != 3 ++ || (arg[0] != 's' && arg[0] != 'h') ++ || arg[1] != 'w' ++ || (arg[0] == 's' && arg[2] != '0' && arg[2] != '1') ++ || (arg[0] == 'h' && (arg[2] < '0' || arg[2] > '5'))) ++ { ++ warning (OPT_Wattributes, ++ "interrupt vector to %qE attribute is not " ++ "vector=(sw0|sw1|hw0|hw1|hw2|hw3|hw4|hw5)", ++ name); ++ *no_add_attrs = true; ++ } ++ } ++ ++ return NULL_TREE; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* Handle a "use_shadow_register_set" attribute with an optional argument. */ ++ ++static tree ++loongarch_handle_use_shadow_register_set_attr (tree *node ATTRIBUTE_UNUSED, ++ tree name, tree args, ++ int flags ATTRIBUTE_UNUSED, ++ bool *no_add_attrs) ++{ ++ /* Check for an argument. */ ++ if (is_attribute_p ("use_shadow_register_set", name) && args != NULL) ++ { ++ tree cst; ++ ++ cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != STRING_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires a string argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (strcmp (TREE_STRING_POINTER (cst), "intstack") != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is not intstack", name); ++ *no_add_attrs = true; ++ } ++ ++ return NULL_TREE; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR ++ and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ ++ ++static void ++loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr) ++{ ++ if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) ++ { ++ *base_ptr = XEXP (x, 0); ++ *offset_ptr = INTVAL (XEXP (x, 1)); ++ } ++ else ++ { ++ *base_ptr = x; ++ *offset_ptr = 0; ++ } ++} ++ ++static unsigned int loongarch_build_integer (struct loongarch_integer_op *, ++ unsigned HOST_WIDE_INT); ++ ++/* Fill CODES with a sequence of rtl operations to load VALUE. ++ Return the number of operations needed. ++ Split interger in loongarch_output_move. */ ++ ++static unsigned int ++loongarch_build_integer (struct loongarch_integer_op *codes, ++ unsigned HOST_WIDE_INT value) ++{ ++ uint32_t hi32, lo32; ++ char all0_bit_vec, sign_bit_vec, allf_bit_vec, paritial_is_sext_of_prev; ++ unsigned int cost = 0; ++ ++ lo32 = value & 0xffffffff; ++ hi32 = value >> 32; ++ ++ all0_bit_vec = (((hi32 & 0xfff00000) == 0) << 3) ++ | (((hi32 & 0x000fffff) == 0) << 2) ++ | (((lo32 & 0xfffff000) == 0) << 1) ++ | ((lo32 & 0x00000fff) == 0); ++ sign_bit_vec = (((hi32 & 0x80000000) != 0) << 3) ++ | (((hi32 & 0x00080000) != 0) << 2) ++ | (((lo32 & 0x80000000) != 0) << 1) ++ | ((lo32 & 0x00000800) != 0); ++ allf_bit_vec = (((hi32 & 0xfff00000) == 0xfff00000) << 3) ++ | (((hi32 & 0x000fffff) == 0x000fffff) << 2) ++ | (((lo32 & 0xfffff000) == 0xfffff000) << 1) ++ | ((lo32 & 0x00000fff) == 0x00000fff); ++ paritial_is_sext_of_prev = (all0_bit_vec ^ allf_bit_vec) ++ & (all0_bit_vec ^ (sign_bit_vec << 1)); ++ ++ do ++ { ++ if (paritial_is_sext_of_prev == 0x7) ++ { ++ codes[0].code = UNKNOWN; ++ codes[0].method = METHOD_LU52I; ++ codes[0].value = value & 0xfff0000000000000; ++ cost++; ++ break; ++ } ++ if ((all0_bit_vec & 0x3) == 0x2) ++ { ++ codes[cost].code = UNKNOWN; ++ codes[cost].method = METHOD_NORMAL; ++ codes[cost].value = value & 0xfff; ++ cost++; ++ } ++ else ++ { ++ switch (paritial_is_sext_of_prev & 0x3) ++ { ++ case 0: ++ codes[cost].code = UNKNOWN; ++ codes[cost].method = METHOD_NORMAL; ++ codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000; ++ cost++; ++ codes[cost].code = IOR; ++ codes[cost].method = METHOD_NORMAL; ++ codes[cost].value = value & 0xfff; ++ cost++; ++ break; ++ case 1: ++ codes[cost].code = UNKNOWN; ++ codes[cost].method = METHOD_NORMAL; ++ codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000; ++ cost++; ++ break; ++ case 2: ++ codes[cost].code = UNKNOWN; ++ codes[cost].method = METHOD_NORMAL; ++ codes[cost].value = (HOST_WIDE_INT)value << 52 >> 52; ++ cost++; ++ break; ++ case 3: ++ codes[cost].code = UNKNOWN; ++ codes[cost].method = METHOD_NORMAL; ++ codes[cost].value = 0; ++ cost++; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ ++ if (((value & 0xfffffffffffff800) ^ 0xfff00000fffff800) == 0) ++ { ++ codes[cost].method = METHOD_INSV; ++ cost++; ++ break; ++ } ++ ++ switch (paritial_is_sext_of_prev >> 2) ++ { ++ case 0: ++ codes[cost].method = METHOD_LU32I; ++ codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000; ++ cost++; ++ case 1: ++ codes[cost].method = METHOD_LU52I; ++ codes[cost].value = value & 0xfff0000000000000; ++ cost++; ++ break; ++ case 2: ++ codes[cost].method = METHOD_LU32I; ++ codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000; ++ cost++; ++ break; ++ case 3: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ while (0); ++ ++ return cost; ++} ++ ++/* Implement TARGET_LEGITIMATE_CONSTANT_P. */ ++ ++static bool ++loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) ++{ ++ return loongarch_const_insns (x) > 0; ++} ++ ++ ++/* Return true if X is a thread-local symbol. */ ++ ++static bool ++loongarch_tls_symbol_p (rtx x) ++{ ++ return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0; ++} ++ ++/* Return true if SYMBOL_REF X is associated with a global symbol ++ (in the STB_GLOBAL sense). */ ++ ++bool ++loongarch_global_symbol_p (const_rtx x) ++{ ++ if (GET_CODE (x) == LABEL_REF) ++ return false; ++ ++ const_tree decl = SYMBOL_REF_DECL (x); ++ ++ if (!decl) ++ return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); ++ ++ /* Weakref symbols are not TREE_PUBLIC, but their targets are global ++ or weak symbols. Relocations in the object file will be against ++ the target symbol, so it's that symbol's binding that matters here. */ ++ return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl)); ++} ++ ++bool ++loongarch_global_symbol_noweak_p (const_rtx x) ++{ ++ if (GET_CODE (x) == LABEL_REF) ++ return false; ++ ++ const_tree decl = SYMBOL_REF_DECL (x); ++ ++ if (!decl) ++ return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); ++ ++ /* Weakref symbols are not TREE_PUBLIC, but their targets are global ++ or weak symbols. Relocations in the object file will be against ++ the target symbol, so it's that symbol's binding that matters here. */ ++ return DECL_P (decl) && TREE_PUBLIC (decl); ++} ++ ++bool ++loongarch_weak_symbol_p (const_rtx x) ++{ ++ const_tree decl; ++ if (GET_CODE (x) == LABEL_REF || !(decl = SYMBOL_REF_DECL (x))) ++ return false; ++ return DECL_P (decl) && DECL_WEAK (decl); ++} ++ ++ ++/* Return true if SYMBOL_REF X binds locally. */ ++ ++bool ++loongarch_symbol_binds_local_p (const_rtx x) ++{ ++ if (GET_CODE (x) == LABEL_REF) ++ return false; ++ ++ return (SYMBOL_REF_DECL (x) ++ ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) ++ : SYMBOL_REF_LOCAL_P (x)); ++} ++ ++ ++/* Return true if rtx constants of mode MODE should be put into a small ++ data section. */ ++ ++static bool ++loongarch_rtx_constant_in_small_data_p (machine_mode mode) ++{ ++ return (!TARGET_EMBEDDED_DATA ++ && TARGET_LOCAL_SDATA ++ && GET_MODE_SIZE (mode) <= loongarch_small_data_threshold); ++} ++ ++/* Return the method that should be used to access SYMBOL_REF or ++ LABEL_REF X in context CONTEXT. */ ++ ++static enum loongarch_symbol_type ++loongarch_classify_symbol (const_rtx x, enum loongarch_symbol_context context) ++{ ++ if (TARGET_RTP_PIC) ++ return SYMBOL_GOT_DISP; ++ ++ if (GET_CODE (x) == LABEL_REF) ++ { ++ return SYMBOL_GOT_DISP; ++ } ++ ++ gcc_assert (GET_CODE (x) == SYMBOL_REF); ++ ++ if (SYMBOL_REF_TLS_MODEL (x)) ++ return SYMBOL_TLS; ++ ++ if (GET_CODE (x) == SYMBOL_REF) ++ return SYMBOL_GOT_DISP; ++} ++ ++/* Return true if X is a symbolic constant that can be used in context ++ CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */ ++ ++bool ++loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context, ++ enum loongarch_symbol_type *symbol_type) ++{ ++ rtx offset; ++ ++ split_const (x, &x, &offset); ++ if (UNSPEC_ADDRESS_P (x)) ++ { ++ *symbol_type = UNSPEC_ADDRESS_TYPE (x); ++ x = UNSPEC_ADDRESS (x); ++ } ++ else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) ++ { ++ *symbol_type = loongarch_classify_symbol (x, context); ++ if (*symbol_type == SYMBOL_TLS) ++ return true; ++ } ++ else ++ return false; ++ ++ if (offset == const0_rtx) ++ return true; ++ ++ /* Check whether a nonzero offset is valid for the underlying ++ relocations. */ ++ switch (*symbol_type) ++ { ++ /* Fall through. */ ++ ++ case SYMBOL_GOT_DISP: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: ++ case SYMBOL_TLS: ++ return false; ++ } ++ gcc_unreachable (); ++} ++ ++/* Like loongarch_symbol_insns We rely on the fact that, in the worst case. */ ++ ++static int ++loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode) ++{ ++ if (loongarch_use_pcrel_pool_p[(int) type]) ++ { ++ /* The constant must be loaded and then dereferenced. */ ++ return 0; ++ } ++ ++ switch (type) ++ { ++ case SYMBOL_GOT_DISP: ++ /* The constant will have to be loaded from the GOT before it ++ is used in an address. */ ++ if (mode != MAX_MACHINE_MODE) ++ return 0; ++ ++ /* Fall through. */ ++ ++ return 3; ++ ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: ++ return 1; ++ ++ case SYMBOL_TLS: ++ /* We don't treat a bare TLS symbol as a constant. */ ++ return 0; ++ } ++ gcc_unreachable (); ++} ++ ++/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed ++ to load symbols of type TYPE into a register. Return 0 if the given ++ type of symbol cannot be used as an immediate operand. ++ ++ Otherwise, return the number of instructions needed to load or store ++ values of mode MODE to or from addresses of type TYPE. Return 0 if ++ the given type of symbol is not valid in addresses. ++ ++ In both cases, instruction counts are based off BASE_INSN_LENGTH. */ ++ ++static int ++loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) ++{ ++ return loongarch_symbol_insns_1 (type, mode) * (1); ++} ++ ++/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ ++ ++static bool ++loongarch_cannot_force_const_mem (machine_mode mode, rtx x) ++{ ++ enum loongarch_symbol_type type; ++ rtx base, offset; ++ ++ /* There is no assembler syntax for expressing an address-sized ++ high part. */ ++ if (GET_CODE (x) == HIGH) ++ return true; ++ ++ /* As an optimization, reject constants that loongarch_legitimize_move ++ can expand inline. ++ ++ Suppose we have a multi-instruction sequence that loads constant C ++ into register R. If R does not get allocated a hard register, and ++ R is used in an operand that allows both registers and memory ++ references, reload will consider forcing C into memory and using ++ one of the instruction's memory alternatives. Returning false ++ here will force it to use an input reload instead. */ ++ if (CONST_INT_P (x) && loongarch_legitimate_constant_p (mode, x)) ++ return true; ++ ++ split_const (x, &base, &offset); ++ if (loongarch_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type)) ++ { ++ /* See whether we explicitly want these symbols in the pool. */ ++ if (loongarch_use_pcrel_pool_p[(int) type]) ++ return false; ++ ++ /* The same optimization as for CONST_INT. */ ++ if (SMALL_INT (offset) && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0) ++ return true; ++ ++ } ++ ++ /* TLS symbols must be computed by loongarch_legitimize_move. */ ++ if (tls_referenced_p (x)) ++ return true; ++ ++ return false; ++} ++ ++ ++/* Return true if register REGNO is a valid base register for mode MODE. ++ STRICT_P is true if REG_OK_STRICT is in effect. */ ++ ++int ++loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode, ++ bool strict_p) ++{ ++ if (!HARD_REGISTER_NUM_P (regno)) ++ { ++ if (!strict_p) ++ return true; ++ regno = reg_renumber[regno]; ++ } ++ ++ /* These fake registers will be eliminated to either the stack or ++ hard frame pointer, both of which are usually valid base registers. ++ Reload deals with the cases where the eliminated form isn't valid. */ ++ if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) ++ return true; ++ ++ ++ return GP_REG_P (regno); ++} ++ ++/* Return true if X is a valid base register for mode MODE. ++ STRICT_P is true if REG_OK_STRICT is in effect. */ ++ ++static bool ++loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) ++{ ++ if (!strict_p && GET_CODE (x) == SUBREG) ++ x = SUBREG_REG (x); ++ ++ return (REG_P (x) ++ && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p)); ++} ++ ++/* Return true if, for every base register BASE_REG, (plus BASE_REG X) ++ can address a value of mode MODE. */ ++ ++static bool ++loongarch_valid_offset_p (rtx x, machine_mode mode) ++{ ++ /* Check that X is a signed 12-bit number, ++ * or check that X is a signed 16-bit number ++ * and offset 4 byte aligned */ ++ if (!(const_arith_operand (x, Pmode) ++ || ((mode == E_SImode || mode == E_DImode) ++ && const_imm16_operand (x, Pmode) ++ && (loongarch_signed_immediate_p (INTVAL (x), 14, 2))))) ++ return false; ++ ++ /* We may need to split multiword moves, so make sure that every word ++ is accessible. */ ++ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) ++ return false; ++ ++ return true; ++} ++ ++/* Return true if X is a valid address for machine mode MODE. If it is, ++ fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in ++ effect. */ ++ ++static bool ++loongarch_classify_address (struct loongarch_address_info *info, rtx x, ++ machine_mode mode, bool strict_p) ++{ ++ switch (GET_CODE (x)) ++ { ++ case REG: ++ case SUBREG: ++ info->type = ADDRESS_REG; ++ info->reg = x; ++ info->offset = const0_rtx; ++ return loongarch_valid_base_register_p (info->reg, mode, strict_p); ++ ++ case PLUS: ++ info->type = ADDRESS_REG; ++ info->reg = XEXP (x, 0); ++ info->offset = XEXP (x, 1); ++ return (loongarch_valid_base_register_p (info->reg, mode, strict_p) ++ && loongarch_valid_offset_p (info->offset, mode)); ++ #if 0 ++ case LABEL_REF: ++ case SYMBOL_REF: ++ info->type = ADDRESS_SYMBOLIC; ++ return (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_MEM, ++ &info->symbol_type) ++ && loongarch_symbol_insns (info->symbol_type, mode) > 0 ++ && !loongarch_split_p[info->symbol_type]); ++ ++ #endif ++ default: ++ return false; ++ } ++} ++ ++/* Implement TARGET_LEGITIMATE_ADDRESS_P. */ ++ ++static bool ++loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) ++{ ++ struct loongarch_address_info addr; ++ ++ return loongarch_classify_address (&addr, x, mode, strict_p); ++} ++ ++/* Return true if X is a legitimate $sp-based address for mode MODE. */ ++ ++bool ++loongarch_stack_address_p (rtx x, machine_mode mode) ++{ ++ struct loongarch_address_info addr; ++ ++ return (loongarch_classify_address (&addr, x, mode, false) ++ && addr.type == ADDRESS_REG ++ && addr.reg == stack_pointer_rtx); ++} ++ ++/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load ++ indexed address instruction. Note that such addresses are ++ not considered legitimate in the TARGET_LEGITIMATE_ADDRESS_P ++ sense, because their use is so restricted. */ ++ ++static bool ++loongarch_lx_address_p (rtx addr, machine_mode mode) ++{ ++ if (GET_CODE (addr) != PLUS ++ || !REG_P (XEXP (addr, 0)) ++ || !REG_P (XEXP (addr, 1))) ++ return false; ++ return false; ++} ++ ++ ++/* Return the number of instructions needed to load or store a value ++ of mode MODE at address X, assuming that BASE_INSN_LENGTH is the ++ length of one instruction. Return 0 if X isn't valid for MODE. ++ Assume that multiword moves may need to be split into word moves ++ if MIGHT_SPLIT_P, otherwise assume that a single load or store is ++ enough. */ ++ ++int ++loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) ++{ ++ struct loongarch_address_info addr; ++ int factor; ++ ++ if (!loongarch_classify_address (&addr, x, mode, false)) ++ return 0; ++ ++ /* BLKmode is used for single unaligned loads and stores and should ++ not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty ++ meaningless, so we have to single it out as a special case one way ++ or the other.) */ ++ if (mode != BLKmode && might_split_p) ++ factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++ else ++ factor = 1; ++ ++ if (loongarch_classify_address (&addr, x, mode, false)) ++ switch (addr.type) ++ { ++ case ADDRESS_REG: ++ return factor; ++ ++ case ADDRESS_CONST_INT: ++ return factor; ++ ++ case ADDRESS_SYMBOLIC: ++ return factor * loongarch_symbol_insns (addr.symbol_type, mode); ++ } ++ return 0; ++} ++ ++/* Return true if X fits within an unsigned field of BITS bits that is ++ shifted left SHIFT bits before being used. */ ++ ++bool ++loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) ++{ ++ return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits)); ++} ++ ++/* Return true if X fits within a signed field of BITS bits that is ++ shifted left SHIFT bits before being used. */ ++ ++bool ++loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) ++{ ++ x += 1 << (bits + shift - 1); ++ return loongarch_unsigned_immediate_p (x, bits, shift); ++} ++ ++/* Return true if X is a legitimate address with a 12-bit offset. ++ MODE is the mode of the value being accessed. */ ++ ++bool ++loongarch_12bit_offset_address_p (rtx x, machine_mode mode) ++{ ++ struct loongarch_address_info addr; ++ ++ return (loongarch_classify_address (&addr, x, mode, false) ++ && addr.type == ADDRESS_REG ++ && CONST_INT_P (addr.offset) ++ && ULARCH_12BIT_OFFSET_P (INTVAL (addr.offset))); ++} ++ ++/* Return true if X is a legitimate address with a 9-bit offset. ++ MODE is the mode of the value being accessed. */ ++ ++bool ++loongarch_9bit_offset_address_p (rtx x, machine_mode mode) ++{ ++ struct loongarch_address_info addr; ++ ++ return (loongarch_classify_address (&addr, x, mode, false) ++ && addr.type == ADDRESS_REG ++ && CONST_INT_P (addr.offset) ++ && LARCH_9BIT_OFFSET_P (INTVAL (addr.offset))); ++} ++ ++/* Return true if X is a legitimate address with a 14-bit offset shifted 2. ++ MODE is the mode of the value being accessed. */ ++ ++bool ++loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode) ++{ ++ struct loongarch_address_info addr; ++ ++ return (loongarch_classify_address (&addr, x, mode, false) ++ && addr.type == ADDRESS_REG ++ && CONST_INT_P (addr.offset) ++ && LISA_16BIT_OFFSET_P (INTVAL (addr.offset)) ++ && LISA_SHIFT_2_OFFSET_P (INTVAL (addr.offset))); ++} ++ ++ ++/* Return the number of instructions needed to load constant X, ++ assuming that BASE_INSN_LENGTH is the length of one instruction. ++ Return 0 if X isn't a valid constant. */ ++ ++int ++loongarch_const_insns (rtx x) ++{ ++ struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; ++ enum loongarch_symbol_type symbol_type; ++ rtx offset; ++ ++ switch (GET_CODE (x)) ++ { ++ case CONST_INT: ++ return loongarch_build_integer (codes, INTVAL (x)); ++ ++ case CONST_VECTOR: ++ /* Fall through. */ ++ case CONST_DOUBLE: ++ /* Allow zeros for normal mode, where we can use $0. */ ++ return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; ++ ++ case CONST: ++ /* See if we can refer to X directly. */ ++ if (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type)) ++ return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE); ++ ++ /* Otherwise try splitting the constant into a base and offset. ++ If the offset is a 16-bit value, we can load the base address ++ into a register and then use (D)ADDIU to add in the offset. ++ If the offset is larger, we can load the base and offset ++ into separate registers and add them together with (D)ADDU. ++ However, the latter is only possible before reload; during ++ and after reload, we must have the option of forcing the ++ constant into the pool instead. */ ++ split_const (x, &x, &offset); ++ if (offset != 0) ++ { ++ int n = loongarch_const_insns (x); ++ if (n != 0) ++ { ++ if (SMALL_INT (offset)) ++ return n + 1; ++ else if (!targetm.cannot_force_const_mem (GET_MODE (x), x)) ++ return n + 1 + loongarch_build_integer (codes, INTVAL (offset)); ++ } ++ } ++ return 0; ++ ++ case SYMBOL_REF: ++ case LABEL_REF: ++ return loongarch_symbol_insns (loongarch_classify_symbol (x, SYMBOL_CONTEXT_LEA), ++ MAX_MACHINE_MODE); ++ ++ default: ++ return 0; ++ } ++} ++ ++/* X is a doubleword constant that can be handled by splitting it into ++ two words and loading each word separately. Return the number of ++ instructions required to do this, assuming that BASE_INSN_LENGTH ++ is the length of one instruction. */ ++ ++int ++loongarch_split_const_insns (rtx x) ++{ ++ unsigned int low, high; ++ ++ low = loongarch_const_insns (loongarch_subword (x, false)); ++ high = loongarch_const_insns (loongarch_subword (x, true)); ++ gcc_assert (low > 0 && high > 0); ++ return low + high; ++} ++ ++/* Return one word of 128-bit value OP, taking into account the fixed ++ endianness of certain registers. BYTE selects from the byte address. */ ++ ++rtx ++loongarch_subword_at_byte (rtx op, unsigned int byte) ++{ ++ machine_mode mode; ++ ++ mode = GET_MODE (op); ++ if (mode == VOIDmode) ++ mode = TImode; ++ ++ gcc_assert (!FP_REG_RTX_P (op)); ++ ++ if (MEM_P (op)) ++ return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte)); ++ ++ return simplify_gen_subreg (word_mode, op, mode, byte); ++} ++ ++/* Return the number of instructions needed to implement INSN, ++ given that it loads from or stores to MEM. Assume that ++ BASE_INSN_LENGTH is the length of one instruction. */ ++ ++int ++loongarch_load_store_insns (rtx mem, rtx_insn *insn) ++{ ++ machine_mode mode; ++ bool might_split_p; ++ rtx set; ++ ++ gcc_assert (MEM_P (mem)); ++ mode = GET_MODE (mem); ++ ++ /* Try to prove that INSN does not need to be split. */ ++ might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD; ++ if (might_split_p) ++ { ++ set = single_set (insn); ++ if (set && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set), insn)) ++ might_split_p = false; ++ } ++ ++ return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p); ++} ++ ++/* Return the number of instructions needed for an integer division, ++ assuming that BASE_INSN_LENGTH is the length of one instruction. */ ++ ++int ++loongarch_idiv_insns (machine_mode mode) ++{ ++ int count; ++ ++ count = 1; ++ if (TARGET_CHECK_ZERO_DIV) ++ count += 2; ++ ++ return count; ++} ++ ++ ++/* Emit a move from SRC to DEST. Assume that the move expanders can ++ handle all moves if !can_create_pseudo_p (). The distinction is ++ important because, unlike emit_move_insn, the move expanders know ++ how to force Pmode objects into the constant pool even when the ++ constant pool address is not itself legitimate. */ ++ ++rtx_insn * ++loongarch_emit_move (rtx dest, rtx src) ++{ ++ return (can_create_pseudo_p () ++ ? emit_move_insn (dest, src) ++ : emit_move_insn_1 (dest, src)); ++} ++ ++/* Emit a move from SRC to DEST, splitting compound moves into individual ++ instructions. SPLIT_TYPE is the type of split to perform. */ ++ ++static void ++loongarch_emit_move_or_split (rtx dest, rtx src, enum loongarch_split_type split_type) ++{ ++ if (loongarch_split_move_p (dest, src, split_type)) ++ loongarch_split_move (dest, src, split_type, NULL); ++ else ++ loongarch_emit_move (dest, src); ++} ++ ++/* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */ ++ ++void ++loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1) ++{ ++ emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target), ++ op0, op1))); ++} ++ ++/* Compute (CODE OP0 OP1) and store the result in a new register ++ of mode MODE. Return that new register. */ ++ ++static rtx ++loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, rtx op1) ++{ ++ rtx reg; ++ ++ reg = gen_reg_rtx (mode); ++ loongarch_emit_binary (code, reg, op0, op1); ++ return reg; ++} ++ ++/* Copy VALUE to a register and return that register. If new pseudos ++ are allowed, copy it into a new register, otherwise use DEST. */ ++ ++static rtx ++loongarch_force_temporary (rtx dest, rtx value) ++{ ++ if (can_create_pseudo_p ()) ++ return force_reg (Pmode, value); ++ else ++ { ++ loongarch_emit_move (dest, value); ++ return dest; ++ } ++} ++ ++ ++/* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, ++ then add CONST_INT OFFSET to the result. */ ++ ++static rtx ++loongarch_unspec_address_offset (rtx base, rtx offset, ++ enum loongarch_symbol_type symbol_type) ++{ ++ base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), ++ UNSPEC_ADDRESS_FIRST + symbol_type); ++ if (offset != const0_rtx) ++ base = gen_rtx_PLUS (Pmode, base, offset); ++ return gen_rtx_CONST (Pmode, base); ++} ++ ++/* Return an UNSPEC address with underlying address ADDRESS and symbol ++ type SYMBOL_TYPE. */ ++ ++rtx ++loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type) ++{ ++ rtx base, offset; ++ ++ split_const (address, &base, &offset); ++ return loongarch_unspec_address_offset (base, offset, symbol_type); ++} ++ ++/* If OP is an UNSPEC address, return the address to which it refers, ++ otherwise return OP itself. */ ++ ++rtx ++loongarch_strip_unspec_address (rtx op) ++{ ++ rtx base, offset; ++ ++ split_const (op, &base, &offset); ++ if (UNSPEC_ADDRESS_P (base)) ++ op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset)); ++ return op; ++} ++ ++ ++/* Return a base register that holds pic_offset_table_rtx. ++ TEMP, if nonnull, is a scratch Pmode base register. */ ++ ++rtx ++loongarch_pic_base_register (rtx temp) ++{ ++ return pic_offset_table_rtx; ++ ++} ++ ++/* If SRC is the RHS of a load_call insn, return the underlying symbol ++ reference. Return NULL_RTX otherwise. */ ++ ++static rtx ++loongarch_strip_unspec_call (rtx src) ++{ ++ if (GET_CODE (src) == UNSPEC && XINT (src, 1) == UNSPEC_LOAD_CALL) ++ return loongarch_strip_unspec_address (XVECEXP (src, 0, 1)); ++ return NULL_RTX; ++} ++ ++/* Return a legitimate address for REG + OFFSET. TEMP is as for ++ loongarch_force_temporary; it is only needed when OFFSET is not a ++ SMALL_OPERAND. */ ++ ++static rtx ++loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) ++{ ++ if (!SMALL_OPERAND (offset)) ++ { ++ rtx high; ++ ++ /* Leave OFFSET as a 16-bit offset and put the excess in HIGH. ++ The addition inside the macro CONST_HIGH_PART may cause an ++ overflow, so we need to force a sign-extension check. */ ++ high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); ++ offset = CONST_LOW_PART (offset); ++ high = loongarch_force_temporary (temp, high); ++ reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); ++ } ++ return plus_constant (Pmode, reg, offset); ++} ++ ++/* The __tls_get_attr symbol. */ ++static GTY(()) rtx loongarch_tls_symbol; ++ ++/* Load an entry from the GOT for a TLS GD access. */ ++ ++static rtx loongarch_got_load_tls_gd (rtx dest, rtx sym) ++{ ++ if (Pmode == DImode) ++ return gen_got_load_tls_gddi (dest, sym); ++ else ++ return gen_got_load_tls_gdsi (dest, sym); ++} ++ ++/* Load an entry from the GOT for a TLS LD access. */ ++ ++static rtx loongarch_got_load_tls_ld (rtx dest, rtx sym) ++{ ++ if (Pmode == DImode) ++ return gen_got_load_tls_lddi (dest, sym); ++ else ++ return gen_got_load_tls_ldsi (dest, sym); ++} ++ ++ ++/* Load an entry from the GOT for a TLS IE access. */ ++ ++static rtx loongarch_got_load_tls_ie (rtx dest, rtx sym) ++{ ++ if (Pmode == DImode) ++ return gen_got_load_tls_iedi (dest, sym); ++ else ++ return gen_got_load_tls_iesi (dest, sym); ++} ++ ++/* Add in the thread pointer for a TLS LE access. */ ++ ++static rtx loongarch_got_load_tls_le (rtx dest, rtx sym) ++{ ++ if (Pmode == DImode) ++ return gen_got_load_tls_ledi (dest, sym); ++ else ++ return gen_got_load_tls_lesi (dest, sym); ++} ++ ++/* Return an instruction sequence that calls __tls_get_addr. SYM is ++ the TLS symbol we are referencing and TYPE is the symbol type to use ++ (either global dynamic or local dynamic). V0 is an RTX for the ++ return value location. */ ++ ++static rtx_insn * ++loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) ++{ ++ rtx loc, a0; ++ rtx_insn *insn; ++ ++ a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); ++ ++ if (!loongarch_tls_symbol) ++ loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr"); ++ ++ loc = loongarch_unspec_address (sym, type); ++ ++ start_sequence (); ++ ++ if (type == SYMBOL_TLSLDM) ++ emit_insn (loongarch_got_load_tls_ld (a0, loc)); ++ else if (type == SYMBOL_TLSGD) ++ emit_insn (loongarch_got_load_tls_gd (a0, loc)); ++ else ++ gcc_unreachable (); ++ ++ insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, const0_rtx)); ++ RTL_CONST_CALL_P (insn) = 1; ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); ++ insn = get_insns (); ++ ++ end_sequence (); ++ ++ return insn; ++} ++ ++/* Generate the code to access LOC, a thread-local SYMBOL_REF, and return ++ its address. The return value will be both a valid address and a valid ++ SET_SRC (either a REG or a LO_SUM). */ ++ ++static rtx ++loongarch_legitimize_tls_address (rtx loc) ++{ ++ rtx dest, tp, tmp; ++ enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); ++ rtx_insn *insn; ++ ++ /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */ ++ #if 0 ++ if (!flag_pic) ++ model = TLS_MODEL_LOCAL_EXEC; ++ #endif ++ ++ switch (model) ++ { ++ case TLS_MODEL_LOCAL_DYNAMIC: ++ tmp = gen_rtx_REG (Pmode, GP_RETURN); ++ dest = gen_reg_rtx (Pmode); ++ insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); ++ emit_libcall_block (insn, dest, tmp, loc); ++ break; ++ ++ case TLS_MODEL_GLOBAL_DYNAMIC: ++ tmp = gen_rtx_REG (Pmode, GP_RETURN); ++ dest = gen_reg_rtx (Pmode); ++ insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); ++ emit_libcall_block (insn, dest, tmp, loc); ++ break; ++ ++ case TLS_MODEL_INITIAL_EXEC: ++ /* la.tls.ie; tp-relative add */ ++ tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); ++ tmp = gen_reg_rtx (Pmode); ++ emit_insn (loongarch_got_load_tls_ie (tmp, loc)); ++ dest = gen_reg_rtx (Pmode); ++ emit_insn (gen_add3_insn (dest, tmp, tp)); ++ break; ++ ++ case TLS_MODEL_LOCAL_EXEC: ++ /* la.tls.le; tp-relative add */ ++ tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); ++ tmp = gen_reg_rtx (Pmode); ++ emit_insn (loongarch_got_load_tls_le (tmp, loc)); ++ dest = gen_reg_rtx (Pmode); ++ emit_insn (gen_add3_insn (dest, tmp, tp)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ return dest; ++} ++ ++rtx ++loongarch_legitimize_call_address (rtx addr) ++{ ++ if (!call_insn_operand (addr, VOIDmode)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ loongarch_emit_move (reg, addr); ++ return reg; ++ } ++ return addr; ++} ++ ++/* If X is not a valid address for mode MODE, force it into a register. */ ++ ++static rtx ++loongarch_force_address (rtx x, machine_mode mode) ++{ ++ if (!loongarch_legitimate_address_p (mode, x, false)) ++ x = force_reg (Pmode, x); ++ return x; ++} ++ ++/* This function is used to implement LEGITIMIZE_ADDRESS. If X can ++ be legitimized in a way that the generic machinery might not expect, ++ return a new address, otherwise return NULL. MODE is the mode of ++ the memory being accessed. */ ++ ++static rtx ++loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, ++ machine_mode mode) ++{ ++ rtx base, addr; ++ HOST_WIDE_INT offset; ++ ++ if (loongarch_tls_symbol_p (x)) ++ return loongarch_legitimize_tls_address (x); ++ ++ /* Handle BASE + OFFSET using loongarch_add_offset. */ ++ loongarch_split_plus (x, &base, &offset); ++ if (offset != 0) ++ { ++ if (!loongarch_valid_base_register_p (base, mode, false)) ++ base = copy_to_mode_reg (Pmode, base); ++ addr = loongarch_add_offset (NULL, base, offset); ++ return loongarch_force_address (addr, mode); ++ } ++ ++ return x; ++} ++ ++/* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */ ++ ++void ++loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) ++{ ++ struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; ++ machine_mode mode; ++ unsigned int i, num_ops; ++ rtx x; ++ ++ mode = GET_MODE (dest); ++ num_ops = loongarch_build_integer (codes, value); ++ ++ /* Apply each binary operation to X. Invariant: X is a legitimate ++ source operand for a SET pattern. */ ++ x = GEN_INT (codes[0].value); ++ for (i = 1; i < num_ops; i++) ++ { ++ if (!can_create_pseudo_p ()) ++ { ++ emit_insn (gen_rtx_SET (temp, x)); ++ x = temp; ++ } ++ else ++ x = force_reg (mode, x); ++ switch (codes[i].method) ++ { ++ case METHOD_NORMAL: ++ x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value)); ++ break; ++ case METHOD_LU32I: ++ emit_insn (gen_rtx_SET (x, gen_rtx_IOR (DImode, ++ gen_rtx_ZERO_EXTEND (DImode, ++ gen_rtx_SUBREG (SImode, x, 0)), ++ GEN_INT (codes[i].value)))); ++ break; ++ case METHOD_LU52I: ++ emit_insn (gen_rtx_SET (x, ++ gen_rtx_UNSPEC (DImode, ++ gen_rtvec (2, ++ x, ++ GEN_INT (codes[i].value)), ++ UNSPEC_LU52I_D))); ++ break; ++ case METHOD_INSV: ++ emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, ++ x, ++ GEN_INT (20), ++ GEN_INT (32)), ++ gen_rtx_REG (DImode, 0))); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ ++ emit_insn (gen_rtx_SET (dest, x)); ++} ++ ++/* Subroutine of loongarch_legitimize_move. Move constant SRC into register ++ DEST given that SRC satisfies immediate_operand but doesn't satisfy ++ move_operand. */ ++ ++static void ++loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) ++{ ++ rtx base, offset; ++ ++ /* Split moves of big integers into smaller pieces. */ ++ if (splittable_const_int_operand (src, mode)) ++ { ++ loongarch_move_integer (dest, dest, INTVAL (src)); ++ return; ++ } ++ ++ /* Generate the appropriate access sequences for TLS symbols. */ ++ if (loongarch_tls_symbol_p (src)) ++ { ++ loongarch_emit_move (dest, loongarch_legitimize_tls_address (src)); ++ return; ++ } ++ ++ /* If we have (const (plus symbol offset)), and that expression cannot ++ be forced into memory, load the symbol first and add in the offset. ++ prefer to do this even if the constant _can_ be forced into memory, ++ as it usually produces better code. */ ++ split_const (src, &base, &offset); ++ if (offset != const0_rtx ++ && (targetm.cannot_force_const_mem (mode, src) ++ || (can_create_pseudo_p ()))) ++ { ++ base = loongarch_force_temporary (dest, base); ++ loongarch_emit_move (dest, loongarch_add_offset (NULL, base, INTVAL (offset))); ++ return; ++ } ++ ++ src = force_const_mem (mode, src); ++ ++ loongarch_emit_move (dest, src); ++} ++ ++/* If (set DEST SRC) is not a valid move instruction, emit an equivalent ++ sequence that is valid. */ ++ ++bool ++loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) ++{ ++ ++ if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) ++ { ++ loongarch_emit_move (dest, force_reg (mode, src)); ++ return true; ++ } ++ ++ /* Both src and dest are non-registers; one special case is supported where ++ the source is (const_int 0) and the store can source the zero register. ++ */ ++ if (!register_operand (dest, mode) ++ && !register_operand (src, mode) ++ && !const_0_operand (src, mode)) ++ { ++ loongarch_emit_move (dest, force_reg (mode, src)); ++ return true; ++ } ++ ++ /* We need to deal with constants that would be legitimate ++ immediate_operands but aren't legitimate move_operands. */ ++ if (CONSTANT_P (src) && !move_operand (src, mode)) ++ { ++ loongarch_legitimize_const_move (mode, dest, src); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src)); ++ return true; ++ } ++ return false; ++} ++ ++/* Return true if OP refers to small data symbols directly, not through ++ a LO_SUM. CONTEXT is the context in which X appears. */ ++ ++static int ++loongarch_small_data_pattern_1 (rtx x, enum loongarch_symbol_context context) ++{ ++ subrtx_var_iterator::array_type array; ++ FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) ++ { ++ rtx x = *iter; ++ ++ /* Ignore things like "g" constraints in asms. We make no particular ++ guarantee about which symbolic constants are acceptable as asm operands ++ versus which must be forced into a GPR. */ ++ if (GET_CODE (x) == ASM_OPERANDS) ++ iter.skip_subrtxes (); ++ else if (MEM_P (x)) ++ { ++ if (loongarch_small_data_pattern_1 (XEXP (x, 0), SYMBOL_CONTEXT_MEM)) ++ return true; ++ iter.skip_subrtxes (); ++ } ++ } ++ return false; ++} ++ ++/* Return true if OP refers to small data symbols directly, not through ++ a LO_SUM. */ ++ ++bool ++loongarch_small_data_pattern_p (rtx op) ++{ ++ return loongarch_small_data_pattern_1 (op, SYMBOL_CONTEXT_LEA); ++} ++ ++/* Rewrite *LOC so that it refers to small data using explicit ++ relocations. CONTEXT is the context in which *LOC appears. */ ++ ++static void ++loongarch_rewrite_small_data_1 (rtx *loc, enum loongarch_symbol_context context) ++{ ++ subrtx_ptr_iterator::array_type array; ++ FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) ++ { ++ rtx *loc = *iter; ++ if (MEM_P (*loc)) ++ { ++ loongarch_rewrite_small_data_1 (&XEXP (*loc, 0), SYMBOL_CONTEXT_MEM); ++ iter.skip_subrtxes (); ++ } ++ } ++} ++ ++/* Rewrite instruction pattern PATTERN so that it refers to small data ++ using explicit relocations. */ ++ ++rtx ++loongarch_rewrite_small_data (rtx pattern) ++{ ++ pattern = copy_insn (pattern); ++ loongarch_rewrite_small_data_1 (&pattern, SYMBOL_CONTEXT_LEA); ++ return pattern; ++} ++ ++/* The cost of loading values from the constant pool. It should be ++ larger than the cost of any constant we want to synthesize inline. */ ++#define CONSTANT_POOL_COST COSTS_N_INSNS (8) ++ ++/* Return true if there is a instruction that implements CODE ++ and if that instruction accepts X as an immediate operand. */ ++ ++static int ++loongarch_immediate_operand_p (int code, HOST_WIDE_INT x) ++{ ++ switch (code) ++ { ++ case ASHIFT: ++ case ASHIFTRT: ++ case LSHIFTRT: ++ /* All shift counts are truncated to a valid constant. */ ++ return true; ++ ++ case ROTATE: ++ case ROTATERT: ++ /* Likewise rotates, if the target supports rotates at all. */ ++ return true; ++ ++ case AND: ++ case IOR: ++ case XOR: ++ /* These instructions take 12-bit unsigned immediates. */ ++ return SMALL_OPERAND_UNSIGNED (x); ++ ++ case PLUS: ++ case LT: ++ case LTU: ++ /* These instructions take 12-bit signed immediates. */ ++ return SMALL_OPERAND (x); ++ ++ case EQ: ++ case NE: ++ case GT: ++ case GTU: ++ /* The "immediate" forms of these instructions are really ++ implemented as comparisons with register 0. */ ++ return x == 0; ++ ++ case GE: ++ case GEU: ++ /* Likewise, meaning that the only valid immediate operand is 1. */ ++ return x == 1; ++ ++ case LE: ++ /* We add 1 to the immediate and use SLT. */ ++ return SMALL_OPERAND (x + 1); ++ ++ case LEU: ++ /* Likewise SLTU, but reject the always-true case. */ ++ return SMALL_OPERAND (x + 1) && x + 1 != 0; ++ ++ case SIGN_EXTRACT: ++ case ZERO_EXTRACT: ++ /* The bit position and size are immediate operands. */ ++ return 1; ++ ++ default: ++ /* By default assume that $0 can be used for 0. */ ++ return x == 0; ++ } ++} ++ ++/* Return the cost of binary operation X, given that the instruction ++ sequence for a word-sized or smaller operation has cost SINGLE_COST ++ and that the sequence of a double-word operation has cost DOUBLE_COST. ++ If SPEED is true, optimize for speed otherwise optimize for size. */ ++ ++static int ++loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed) ++{ ++ int cost; ++ ++ if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2) ++ cost = double_cost; ++ else ++ cost = single_cost; ++ return (cost ++ + set_src_cost (XEXP (x, 0), GET_MODE (x), speed) ++ + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed)); ++} ++ ++/* Return the cost of floating-point multiplications of mode MODE. */ ++ ++static int ++loongarch_fp_mult_cost (machine_mode mode) ++{ ++ return mode == DFmode ? loongarch_cost->fp_mult_df : loongarch_cost->fp_mult_sf; ++} ++ ++/* Return the cost of floating-point divisions of mode MODE. */ ++ ++static int ++loongarch_fp_div_cost (machine_mode mode) ++{ ++ return mode == DFmode ? loongarch_cost->fp_div_df : loongarch_cost->fp_div_sf; ++} ++ ++/* Return the cost of sign-extending OP to mode MODE, not including the ++ cost of OP itself. */ ++ ++static int ++loongarch_sign_extend_cost (machine_mode mode, rtx op) ++{ ++ if (MEM_P (op)) ++ /* Extended loads are as cheap as unextended ones. */ ++ return 0; ++ ++ if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode) ++ /* A sign extension from SImode to DImode in 64-bit mode is free. */ ++ return 0; ++ ++ return COSTS_N_INSNS (1); ++} ++ ++/* Return the cost of zero-extending OP to mode MODE, not including the ++ cost of OP itself. */ ++ ++static int ++loongarch_zero_extend_cost (machine_mode mode, rtx op) ++{ ++ if (MEM_P (op)) ++ /* Extended loads are as cheap as unextended ones. */ ++ return 0; ++ ++ if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode) ++ /* We need a shift left by 32 bits and a shift right by 32 bits. */ ++ return COSTS_N_INSNS (2); ++ ++ /* We can use ANDI. */ ++ return COSTS_N_INSNS (1); ++} ++ ++/* Return the cost of moving between two registers of mode MODE, ++ assuming that the move will be in pieces of at most UNITS bytes. */ ++ ++static int ++loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) ++{ ++ return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); ++} ++ ++/* Return the cost of moving between two registers of mode MODE. */ ++ ++static int ++loongarch_set_reg_reg_cost (machine_mode mode) ++{ ++ switch (GET_MODE_CLASS (mode)) ++ { ++ case MODE_CC: ++ return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode)); ++ ++ case MODE_FLOAT: ++ case MODE_COMPLEX_FLOAT: ++ case MODE_VECTOR_FLOAT: ++ if (TARGET_HARD_FLOAT) ++ return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE); ++ /* Fall through */ ++ ++ default: ++ return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD); ++ } ++} ++ ++/* Implement TARGET_RTX_COSTS. */ ++ ++static bool ++loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, ++ int opno ATTRIBUTE_UNUSED, int *total, bool speed) ++{ ++ int code = GET_CODE (x); ++ bool float_mode_p = FLOAT_MODE_P (mode); ++ int cost; ++ rtx addr; ++ ++ /* The cost of a COMPARE is hard to define for LARCH. COMPAREs don't ++ appear in the instruction stream, and the cost of a comparison is ++ really the cost of the branch or scc condition. At the time of ++ writing, GCC only uses an explicit outer COMPARE code when optabs ++ is testing whether a constant is expensive enough to force into a ++ register. We want optabs to pass such constants through the LARCH ++ expanders instead, so make all constants very cheap here. */ ++ if (outer_code == COMPARE) ++ { ++ gcc_assert (CONSTANT_P (x)); ++ *total = 0; ++ return true; ++ } ++ ++ switch (code) ++ { ++ case CONST_INT: ++ /* Treat *clear_upper32-style ANDs as having zero cost in the ++ second operand. The cost is entirely in the first operand. ++ ++ ??? This is needed because we would otherwise try to CSE ++ the constant operand. Although that's the right thing for ++ instructions that continue to be a register operation throughout ++ compilation, it is disastrous for instructions that could ++ later be converted into a memory operation. */ ++ if (TARGET_64BIT ++ && outer_code == AND ++ && UINTVAL (x) == 0xffffffff) ++ { ++ *total = 0; ++ return true; ++ } ++ ++ /* When not optimizing for size, we care more about the cost ++ of hot code, and hot code is often in a loop. If a constant ++ operand needs to be forced into a register, we will often be ++ able to hoist the constant load out of the loop, so the load ++ should not contribute to the cost. */ ++ if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x))) ++ { ++ *total = 0; ++ return true; ++ } ++ /* Fall through. */ ++ ++ case CONST: ++ case SYMBOL_REF: ++ case LABEL_REF: ++ case CONST_DOUBLE: ++ if (force_to_mem_operand (x, VOIDmode)) ++ { ++ *total = COSTS_N_INSNS (1); ++ return true; ++ } ++ cost = loongarch_const_insns (x); ++ if (cost > 0) ++ { ++ /* If the constant is likely to be stored in a GPR, SETs of ++ single-insn constants are as cheap as register sets; we ++ never want to CSE them. ++ ++ Don't reduce the cost of storing a floating-point zero in ++ FPRs. If we have a zero in an FPR for other reasons, we ++ can get better cfg-cleanup and delayed-branch results by ++ using it consistently, rather than using $0 sometimes and ++ an FPR at other times. Also, moves between floating-point ++ registers are sometimes cheaper than MOVGR2FR.W/MOVGR2FR.D $0. */ ++ if (cost == 1 ++ && outer_code == SET ++ && !(float_mode_p && TARGET_HARD_FLOAT)) ++ cost = 0; ++ /* When code loads a constant N>1 times, we rarely ++ want to CSE the constant itself. It is usually better to ++ have N copies of the last operation in the sequence and one ++ shared copy of the other operations. ++ ++ Also, if we have a CONST_INT, we don't know whether it is ++ for a word or doubleword operation, so we cannot rely on ++ the result of loongarch_build_integer. */ ++ else if ((outer_code == SET || GET_MODE (x) == VOIDmode)) ++ cost = 1; ++ *total = COSTS_N_INSNS (cost); ++ return true; ++ } ++ /* The value will need to be fetched from the constant pool. */ ++ *total = CONSTANT_POOL_COST; ++ return true; ++ ++ case MEM: ++ /* If the address is legitimate, return the number of ++ instructions it needs. */ ++ addr = XEXP (x, 0); ++ cost = loongarch_address_insns (addr, mode, true); ++ if (cost > 0) ++ { ++ *total = COSTS_N_INSNS (cost + 1); ++ return true; ++ } ++ /* Check for a scaled indexed address. */ ++ if (loongarch_lx_address_p (addr, mode)) ++ { ++ *total = COSTS_N_INSNS (2); ++ return true; ++ } ++ /* Otherwise use the default handling. */ ++ return false; ++ ++ case FFS: ++ *total = COSTS_N_INSNS (6); ++ return false; ++ ++ case NOT: ++ *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1); ++ return false; ++ ++ case AND: ++ /* Check for a *clear_upper32 pattern and treat it like a zero ++ extension. See the pattern's comment for details. */ ++ if (TARGET_64BIT ++ && mode == DImode ++ && CONST_INT_P (XEXP (x, 1)) ++ && UINTVAL (XEXP (x, 1)) == 0xffffffff) ++ { ++ *total = (loongarch_zero_extend_cost (mode, XEXP (x, 0)) ++ + set_src_cost (XEXP (x, 0), mode, speed)); ++ return true; ++ } ++ /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in ++ a single instruction. */ ++ if (GET_CODE (XEXP (x, 0)) == NOT ++ && GET_CODE (XEXP (x, 1)) == NOT) ++ { ++ cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1; ++ *total = (COSTS_N_INSNS (cost) ++ + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) ++ + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed)); ++ return true; ++ } ++ ++ /* Fall through. */ ++ ++ case IOR: ++ case XOR: ++ /* Double-word operations use two single-word operations. */ ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), ++ speed); ++ return true; ++ ++ case ASHIFT: ++ case ASHIFTRT: ++ case LSHIFTRT: ++ case ROTATE: ++ case ROTATERT: ++ if (CONSTANT_P (XEXP (x, 1))) ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), ++ speed); ++ else ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12), ++ speed); ++ return true; ++ ++ case ABS: ++ if (float_mode_p) ++ *total = loongarch_cost->fp_add; ++ else ++ *total = COSTS_N_INSNS (4); ++ return false; ++ ++ case LT: ++ case LTU: ++ case LE: ++ case LEU: ++ case GT: ++ case GTU: ++ case GE: ++ case GEU: ++ case EQ: ++ case NE: ++ case UNORDERED: ++ case LTGT: ++ case UNGE: ++ case UNGT: ++ case UNLE: ++ case UNLT: ++ /* Branch comparisons have VOIDmode, so use the first operand's ++ mode instead. */ ++ mode = GET_MODE (XEXP (x, 0)); ++ if (FLOAT_MODE_P (mode)) ++ { ++ *total = loongarch_cost->fp_add; ++ return false; ++ } ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), ++ speed); ++ return true; ++ ++ case MINUS: ++ case PLUS: ++ if (float_mode_p) ++ { ++ *total = loongarch_cost->fp_add; ++ return false; ++ } ++ ++ /* If it's an add + mult (which is equivalent to shift left) and ++ it's immediate operand satisfies const_immlsa_operand predicate. */ ++ if (((ISA_HAS_LSA && mode == SImode) ++ || (ISA_HAS_DLSA && mode == DImode)) ++ && GET_CODE (XEXP (x, 0)) == MULT) ++ { ++ rtx op2 = XEXP (XEXP (x, 0), 1); ++ if (const_immlsa_operand (op2, mode)) ++ { ++ *total = (COSTS_N_INSNS (1) ++ + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) ++ + set_src_cost (XEXP (x, 1), mode, speed)); ++ return true; ++ } ++ } ++ ++ /* Double-word operations require three single-word operations and ++ an SLTU. */ ++ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), ++ COSTS_N_INSNS (4), ++ speed); ++ return true; ++ ++ case NEG: ++ if (float_mode_p) ++ *total = loongarch_cost->fp_add; ++ else ++ *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1); ++ return false; ++ ++ case FMA: ++ *total = loongarch_fp_mult_cost (mode); ++ return false; ++ ++ case MULT: ++ if (float_mode_p) ++ *total = loongarch_fp_mult_cost (mode); ++ else if (mode == DImode && !TARGET_64BIT) ++ /* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions, ++ where the mulsidi3 always includes an MFHI and an MFLO. */ ++ // FIXED ME??? ++ *total = (speed ++ ? loongarch_cost->int_mult_si * 3 + 6 ++ : COSTS_N_INSNS (7)); ++ else if (!speed) ++ *total = COSTS_N_INSNS (1) + 1; ++ else if (mode == DImode) ++ *total = loongarch_cost->int_mult_di; ++ else ++ *total = loongarch_cost->int_mult_si; ++ return false; ++ ++ case DIV: ++ /* Check for a reciprocal. */ ++ if (float_mode_p ++ && ISA_HAS_FP_RECIP_RSQRT (mode) ++ && flag_unsafe_math_optimizations ++ && XEXP (x, 0) == CONST1_RTX (mode)) ++ { ++ if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT) ++ /* An rsqrta or rsqrtb pattern. Count the ++ division as being free. */ ++ *total = set_src_cost (XEXP (x, 1), mode, speed); ++ else ++ *total = (loongarch_fp_div_cost (mode) ++ + set_src_cost (XEXP (x, 1), mode, speed)); ++ return true; ++ } ++ /* Fall through. */ ++ ++ case SQRT: ++ case MOD: ++ if (float_mode_p) ++ { ++ *total = loongarch_fp_div_cost (mode); ++ return false; ++ } ++ /* Fall through. */ ++ ++ case UDIV: ++ case UMOD: ++ if (!speed) ++ { ++ *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); ++ } ++ else if (mode == DImode) ++ *total = loongarch_cost->int_div_di; ++ else ++ *total = loongarch_cost->int_div_si; ++ return false; ++ ++ case SIGN_EXTEND: ++ *total = loongarch_sign_extend_cost (mode, XEXP (x, 0)); ++ return false; ++ ++ case ZERO_EXTEND: ++ *total = loongarch_zero_extend_cost (mode, XEXP (x, 0)); ++ return false; ++ case TRUNCATE: ++ /* Costings for highpart multiplies. Matching patterns of the form: ++ ++ (lshiftrt:DI (mult:DI (sign_extend:DI (...) ++ (sign_extend:DI (...)) ++ (const_int 32) ++ */ ++ if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT ++ || GET_CODE (XEXP (x, 0)) == LSHIFTRT) ++ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) ++ && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32 ++ && GET_MODE (XEXP (x, 0)) == DImode) ++ || (ISA_HAS_DMUL ++ && INTVAL (XEXP (XEXP (x, 0), 1)) == 64 ++ && GET_MODE (XEXP (x, 0)) == TImode)) ++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT ++ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND ++ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) ++ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND ++ && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) ++ == ZERO_EXTEND)))) ++ { ++ if (!speed) ++ *total = COSTS_N_INSNS (1) + 1; ++ else if (mode == DImode) ++ *total = loongarch_cost->int_mult_di; ++ else ++ *total = loongarch_cost->int_mult_si; ++ ++ /* Sign extension is free, zero extension costs for DImode when ++ on a 64bit core / when DMUL is present. */ ++ for (int i = 0; i < 2; ++i) ++ { ++ rtx op = XEXP (XEXP (XEXP (x, 0), 0), i); ++ if (ISA_HAS_DMUL ++ && GET_CODE (op) == ZERO_EXTEND ++ && GET_MODE (op) == DImode) ++ *total += rtx_cost (op, DImode, MULT, i, speed); ++ else ++ *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), ++ 0, speed); ++ } ++ ++ return true; ++ } ++ return false; ++ ++ case FLOAT: ++ case UNSIGNED_FLOAT: ++ case FIX: ++ case FLOAT_EXTEND: ++ case FLOAT_TRUNCATE: ++ *total = loongarch_cost->fp_add; ++ return false; ++ ++ case SET: ++ if (register_operand (SET_DEST (x), VOIDmode) ++ && reg_or_0_operand (SET_SRC (x), VOIDmode)) ++ { ++ *total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x))); ++ return true; ++ } ++ return false; ++ ++ default: ++ return false; ++ } ++} ++ ++/* Implement TARGET_ADDRESS_COST. */ ++ ++static int ++loongarch_address_cost (rtx addr, machine_mode mode, ++ addr_space_t as ATTRIBUTE_UNUSED, ++ bool speed ATTRIBUTE_UNUSED) ++{ ++ return loongarch_address_insns (addr, mode, false); ++} ++ ++ ++/* Return one word of double-word value OP, taking into account the fixed ++ endianness of certain registers. HIGH_P is true to select the high part, ++ false to select the low part. */ ++ ++rtx ++loongarch_subword (rtx op, bool high_p) ++{ ++ unsigned int byte, offset; ++ machine_mode mode; ++ ++ mode = GET_MODE (op); ++ if (mode == VOIDmode) ++ mode = TARGET_64BIT ? TImode : DImode; ++ ++ if (high_p) ++ byte = UNITS_PER_WORD; ++ else ++ byte = 0; ++ ++ if (FP_REG_RTX_P (op)) ++ { ++ /* Paired FPRs are always ordered little-endian. */ ++ offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0); ++ return gen_rtx_REG (word_mode, REGNO (op) + offset); ++ } ++ ++ if (MEM_P (op)) ++ return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte)); ++ ++ return simplify_gen_subreg (word_mode, op, mode, byte); ++} ++ ++/* Return true if a move from SRC to DEST should be split into two. ++ SPLIT_TYPE describes the split condition. */ ++ ++bool ++loongarch_split_move_p (rtx dest, rtx src, enum loongarch_split_type split_type) ++{ ++ /* FPR-to-FPR moves can be done in a single instruction, if they're ++ allowed at all. */ ++ unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); ++ if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) ++ return false; ++ ++ /* Check for floating-point loads and stores. */ ++ if (size == 8) ++ { ++ if (FP_REG_RTX_P (dest) && MEM_P (src)) ++ return false; ++ if (FP_REG_RTX_P (src) && MEM_P (dest)) ++ return false; ++ } ++ /* Otherwise split all multiword moves. */ ++ return size > UNITS_PER_WORD; ++} ++ ++/* Split a move from SRC to DEST, given that loongarch_split_move_p holds. ++ SPLIT_TYPE describes the split condition. */ ++ ++void ++loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, rtx insn_) ++{ ++ rtx low_dest; ++ ++ gcc_checking_assert (loongarch_split_move_p (dest, src, split_type)); ++ if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) ++ { ++ if (!TARGET_64BIT && GET_MODE (dest) == DImode) ++ emit_insn (gen_move_doubleword_fprdi (dest, src)); ++ else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) ++ emit_insn (gen_move_doubleword_fprdf (dest, src)); ++ else if (TARGET_64BIT && GET_MODE (dest) == TFmode) ++ emit_insn (gen_move_doubleword_fprtf (dest, src)); ++ else ++ gcc_unreachable (); ++ } ++ else ++ { ++ /* The operation can be split into two normal moves. Decide in ++ which order to do them. */ ++ low_dest = loongarch_subword (dest, false); ++ if (REG_P (low_dest) ++ && reg_overlap_mentioned_p (low_dest, src)) ++ { ++ loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true)); ++ loongarch_emit_move (low_dest, loongarch_subword (src, false)); ++ } ++ else ++ { ++ loongarch_emit_move (low_dest, loongarch_subword (src, false)); ++ loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true)); ++ } ++ } ++ ++ /* This is a hack. See if the next insn uses DEST and if so, see if we ++ can forward SRC for DEST. This is most useful if the next insn is a ++ simple store. */ ++ rtx_insn *insn = (rtx_insn *)insn_; ++ struct loongarch_address_info addr = {}; ++ if (insn) ++ { ++ rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); ++ if (next) ++ { ++ rtx set = single_set (next); ++ if (set && SET_SRC (set) == dest) ++ { ++ if (MEM_P (src)) ++ { ++ rtx tmp = XEXP (src, 0); ++ loongarch_classify_address (&addr, tmp, GET_MODE (tmp), true); ++ if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) ++ validate_change (next, &SET_SRC (set), src, false); ++ } ++ else ++ validate_change (next, &SET_SRC (set), src, false); ++ } ++ } ++ } ++} ++ ++/* Return the split type for instruction INSN. */ ++ ++static enum loongarch_split_type ++loongarch_insn_split_type (rtx insn) ++{ ++ basic_block bb = BLOCK_FOR_INSN (insn); ++ if (bb) ++ { ++ if (optimize_bb_for_speed_p (bb)) ++ return SPLIT_FOR_SPEED; ++ else ++ return SPLIT_FOR_SIZE; ++ } ++ /* Once CFG information has been removed, we should trust the optimization ++ decisions made by previous passes and only split where necessary. */ ++ return SPLIT_IF_NECESSARY; ++} ++ ++ ++/* Return true if a move from SRC to DEST in INSN should be split. */ ++ ++bool ++loongarch_split_move_insn_p (rtx dest, rtx src, rtx insn) ++{ ++ return loongarch_split_move_p (dest, src, loongarch_insn_split_type (insn)); ++} ++ ++/* Split a move from SRC to DEST in INSN, given that loongarch_split_move_insn_p ++ holds. */ ++ ++void ++loongarch_split_move_insn (rtx dest, rtx src, rtx insn) ++{ ++ loongarch_split_move (dest, src, loongarch_insn_split_type (insn), insn); ++} ++ ++ ++/* Forward declaration. Used below */ ++static HOST_WIDE_INT ++loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align); ++ ++/* Return the appropriate instructions to move SRC into DEST. Assume ++ that SRC is operand 1 and DEST is operand 0. */ ++ ++const char * ++loongarch_output_move (rtx dest, rtx src) ++{ ++ enum rtx_code dest_code = GET_CODE (dest); ++ enum rtx_code src_code = GET_CODE (src); ++ machine_mode mode = GET_MODE (dest); ++ bool dbl_p = (GET_MODE_SIZE (mode) == 8); ++ enum loongarch_symbol_type symbol_type; ++ ++ if (loongarch_split_move_p (dest, src, SPLIT_IF_NECESSARY)) ++ return "#"; ++ ++ if ((src_code == REG && GP_REG_P (REGNO (src))) ++ || (src == CONST0_RTX (mode))) ++ { ++ if (dest_code == REG) ++ { ++ if (GP_REG_P (REGNO (dest))) ++ return "or\t%0,%z1,$zero"; ++ ++ if (FP_REG_P (REGNO (dest))) ++ return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1"; ++ } ++ if (dest_code == MEM) ++ { ++ rtx offset = XEXP (dest, 0); ++ if (GET_CODE(offset) == PLUS) ++ offset = XEXP(offset, 1); ++ switch (GET_MODE_SIZE (mode)) ++ { ++ case 1: return "st.b\t%z1,%0"; ++ case 2: return "st.h\t%z1,%0"; ++ case 4: ++ if (const_arith_operand (offset, Pmode)) ++ return "st.w\t%z1,%0"; ++ else ++ return "stptr.w\t%z1,%0"; ++ case 8: ++ if (const_arith_operand (offset, Pmode)) ++ return "st.d\t%z1,%0"; ++ else ++ return "stptr.d\t%z1,%0"; ++ default: gcc_unreachable (); ++ } ++ } ++ } ++ if (dest_code == REG && GP_REG_P (REGNO (dest))) ++ { ++ if (src_code == REG) ++ if (FP_REG_P (REGNO (src))) ++ return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1"; ++ ++ if (src_code == MEM) ++ { ++ rtx offset = XEXP (src, 0); ++ if (GET_CODE(offset) == PLUS) ++ offset = XEXP(offset, 1); ++ switch (GET_MODE_SIZE (mode)) ++ { ++ case 1: return "ld.bu\t%0,%1"; ++ case 2: return "ld.hu\t%0,%1"; ++ case 4: ++ if (const_arith_operand (offset, Pmode)) ++ return "ld.w\t%0,%1"; ++ else ++ return "ldptr.w\t%0,%1"; ++ case 8: ++ if (const_arith_operand (offset, Pmode)) ++ return "ld.d\t%0,%1"; ++ else ++ return "ldptr.d\t%0,%1"; ++ default: gcc_unreachable (); ++ } ++ } ++ ++ if (src_code == CONST_INT) ++ { ++ if (LUI_INT (src)) ++ return "lu12i.w\t%0,%1>>12\t\t\t# %X1"; ++ else if (SMALL_INT (src)) ++ return "addi.w\t%0,$zero,%1\t\t\t# %X1"; ++ else if (SMALL_INT_UNSIGNED (src)) ++ return "ori\t%0,$zero,%1\t\t\t# %X1"; ++ else if (LU52I_INT (src)) ++ return "lu52i.d\t%0,$zero,%X1>>52\t\t\t# %1"; ++ else ++ gcc_unreachable (); ++ } ++ ++ if (symbolic_operand (src, VOIDmode)) ++ { ++ ++ switch (loongarch_cmodel_var) ++ { ++ case LARCH_CMODEL_TINY: ++ do ++ { ++ if (loongarch_global_symbol_p (src) ++ && !loongarch_symbol_binds_local_p (src)) ++ break; ++ case LARCH_CMODEL_TINY_STATIC: ++ if (loongarch_weak_symbol_p (src)) ++ break; ++ ++ /* The symbol must be aligned to 4 byte. */ ++ unsigned int align; ++ ++ if (GET_CODE (src) == LABEL_REF) ++ align = 128 /* whatever */; ++ /* copy from aarch64 */ ++ else if (CONSTANT_POOL_ADDRESS_P (src)) ++ align = GET_MODE_ALIGNMENT (get_pool_mode (src)); ++ else if (TREE_CONSTANT_POOL_ADDRESS_P (src)) ++ { ++ tree exp = SYMBOL_REF_DECL (src); ++ align = TYPE_ALIGN (TREE_TYPE (exp)); ++ align = loongarch_constant_alignment (exp, align); ++ } ++ else if (SYMBOL_REF_DECL (src)) ++ align = DECL_ALIGN (SYMBOL_REF_DECL (src)); ++ else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src) ++ && SYMBOL_REF_BLOCK (src) != NULL) ++ align = SYMBOL_REF_BLOCK (src)->alignment; ++ else ++ align = BITS_PER_UNIT; ++ ++ if (align % (4 * 8) == 0) ++ return "pcaddi\t%0,%%pcrel(%1)>>2"; ++ } ++ while (0); ++ case LARCH_CMODEL_NORMAL: ++ case LARCH_CMODEL_LARGE: ++ if (!loongarch_global_symbol_p (src) ++ || loongarch_symbol_binds_local_p (src)) ++ return "la.local\t%0,%1"; ++ else ++ return "la.global\t%0,%1"; ++ case LARCH_CMODEL_EXTREME: ++ sorry ("not support yet."); ++ return ""; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ } ++ if (src_code == REG && FP_REG_P (REGNO (src))) ++ { ++ if (dest_code == REG && FP_REG_P (REGNO (dest))) ++ return dbl_p ? "fmov.d\t%0,%1" : "fmov.s\t%0,%1"; ++ ++ if (dest_code == MEM) ++ return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0"; ++ } ++ if (dest_code == REG && FP_REG_P (REGNO (dest))) ++ { ++ if (src_code == MEM) ++ return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1"; ++ } ++ gcc_unreachable (); ++} ++ ++/* Return true if CMP1 is a suitable second operand for integer ordering ++ test CODE. See also the *sCC patterns in loongarch.md. */ ++ ++static bool ++loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1) ++{ ++ switch (code) ++ { ++ case GT: ++ case GTU: ++ return reg_or_0_operand (cmp1, VOIDmode); ++ ++ case GE: ++ case GEU: ++ return cmp1 == const1_rtx; ++ ++ case LT: ++ case LTU: ++ return arith_operand (cmp1, VOIDmode); ++ ++ case LE: ++ return sle_operand (cmp1, VOIDmode); ++ ++ case LEU: ++ return sleu_operand (cmp1, VOIDmode); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Return true if *CMP1 (of mode MODE) is a valid second operand for ++ integer ordering test *CODE, or if an equivalent combination can ++ be formed by adjusting *CODE and *CMP1. When returning true, update ++ *CODE and *CMP1 with the chosen code and operand, otherwise leave ++ them alone. */ ++ ++static bool ++loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, ++ machine_mode mode) ++{ ++ HOST_WIDE_INT plus_one; ++ ++ if (loongarch_int_order_operand_ok_p (*code, *cmp1)) ++ return true; ++ ++ if (CONST_INT_P (*cmp1)) ++ switch (*code) ++ { ++ case LE: ++ plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); ++ if (INTVAL (*cmp1) < plus_one) ++ { ++ *code = LT; ++ *cmp1 = force_reg (mode, GEN_INT (plus_one)); ++ return true; ++ } ++ break; ++ ++ case LEU: ++ plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); ++ if (plus_one != 0) ++ { ++ *code = LTU; ++ *cmp1 = force_reg (mode, GEN_INT (plus_one)); ++ return true; ++ } ++ break; ++ ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Compare CMP0 and CMP1 using ordering test CODE and store the result ++ in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR ++ is nonnull, it's OK to set TARGET to the inverse of the result and ++ flip *INVERT_PTR instead. */ ++ ++static void ++loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr, ++ rtx target, rtx cmp0, rtx cmp1) ++{ ++ machine_mode mode; ++ ++ /* First see if there is a LARCH instruction that can do this operation. ++ If not, try doing the same for the inverse operation. If that also ++ fails, force CMP1 into a register and try again. */ ++ mode = GET_MODE (cmp0); ++ if (loongarch_canonicalize_int_order_test (&code, &cmp1, mode)) ++ loongarch_emit_binary (code, target, cmp0, cmp1); ++ else ++ { ++ enum rtx_code inv_code = reverse_condition (code); ++ if (!loongarch_canonicalize_int_order_test (&inv_code, &cmp1, mode)) ++ { ++ cmp1 = force_reg (mode, cmp1); ++ loongarch_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1); ++ } ++ else if (invert_ptr == 0) ++ { ++ rtx inv_target; ++ ++ inv_target = loongarch_force_binary (GET_MODE (target), ++ inv_code, cmp0, cmp1); ++ loongarch_emit_binary (XOR, target, inv_target, const1_rtx); ++ } ++ else ++ { ++ *invert_ptr = !*invert_ptr; ++ loongarch_emit_binary (inv_code, target, cmp0, cmp1); ++ } ++ } ++} ++ ++/* Return a register that is zero iff CMP0 and CMP1 are equal. ++ The register will have the same mode as CMP0. */ ++ ++static rtx ++loongarch_zero_if_equal (rtx cmp0, rtx cmp1) ++{ ++ if (cmp1 == const0_rtx) ++ return cmp0; ++ ++ if (uns_arith_operand (cmp1, VOIDmode)) ++ return expand_binop (GET_MODE (cmp0), xor_optab, ++ cmp0, cmp1, 0, 0, OPTAB_DIRECT); ++ ++ return expand_binop (GET_MODE (cmp0), sub_optab, ++ cmp0, cmp1, 0, 0, OPTAB_DIRECT); ++} ++ ++/* Convert *CODE into a code that can be used in a floating-point ++ scc instruction (C.cond.fmt). Return true if the values of ++ the condition code registers will be inverted, with 0 indicating ++ that the condition holds. */ ++ ++static bool ++loongarch_reversed_fp_cond (enum rtx_code *code) ++{ ++ switch (*code) ++ { ++ case NE: ++ case LTGT: ++ case ORDERED: ++ *code = reverse_condition_maybe_unordered (*code); ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++/* Allocate a floating-point condition-code register of mode MODE. ++ ++ These condition code registers are used for certain kinds ++ of compound operation, such as compare and branches, vconds, ++ and built-in functions. At expand time, their use is entirely ++ controlled by LARCH-specific code and is entirely internal ++ to these compound operations. ++ ++ We could (and did in the past) expose condition-code values ++ as pseudo registers and leave the register allocator to pick ++ appropriate registers. The problem is that it is not practically ++ possible for the rtl optimizers to guarantee that no spills will ++ be needed, even when AVOID_CCMODE_COPIES is defined. We would ++ therefore need spill and reload sequences to handle the worst case. ++ ++ Although such sequences do exist, they are very expensive and are ++ not something we'd want to use. ++ ++ The main benefit of having more than one condition-code register ++ is to allow the pipelining of operations, especially those involving ++ comparisons and conditional moves. We don't really expect the ++ registers to be live for long periods, and certainly never want ++ them to be live across calls. ++ ++ Also, there should be no penalty attached to using all the available ++ registers. They are simply bits in the same underlying FPU control ++ register. ++ ++ We therefore expose the hardware registers from the outset and use ++ a simple round-robin allocation scheme. */ ++ ++static rtx ++loongarch_allocate_fcc (machine_mode mode) ++{ ++ unsigned int regno, count; ++ ++ gcc_assert (TARGET_HARD_FLOAT); ++ ++ if (mode == CCmode) ++ count = 1; ++ else ++ gcc_unreachable (); ++ ++ cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); ++ if (cfun->machine->next_fcc > ST_REG_LAST - ST_REG_FIRST) ++ cfun->machine->next_fcc = 0; ++ ++ regno = ST_REG_FIRST + cfun->machine->next_fcc; ++ cfun->machine->next_fcc += count; ++ return gen_rtx_REG (mode, regno); ++} ++ ++/* Convert a comparison into something that can be used in a branch or ++ conditional move. On entry, *OP0 and *OP1 are the values being ++ compared and *CODE is the code used to compare them. ++ ++ Update *CODE, *OP0 and *OP1 so that they describe the final comparison. ++ If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are possible, ++ otherwise any standard branch condition can be used. The standard branch ++ conditions are: ++ ++ - EQ or NE between two registers. ++ - any comparison between a register and zero. ++ - if compact branches are available then any condition is valid. */ ++ ++static void ++loongarch_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p) ++{ ++ rtx cmp_op0 = *op0; ++ rtx cmp_op1 = *op1; ++ ++ if (GET_MODE_CLASS (GET_MODE (*op0)) == MODE_INT) ++ { ++ if (!need_eq_ne_p && *op1 == const0_rtx) ++ ; ++ else if (*code == EQ || *code == NE) ++ { ++ if (need_eq_ne_p) ++ { ++ *op0 = loongarch_zero_if_equal (cmp_op0, cmp_op1); ++ *op1 = const0_rtx; ++ } ++ else ++ *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1); ++ } ++ else if (!need_eq_ne_p) ++ { ++ bool swap = false; ++ switch (*code) ++ { ++ case LE: ++ swap = true; ++ *code = GE; ++ break; ++ case GT: ++ swap = true; ++ *code = LT; ++ break; ++ case LEU: ++ swap = true; ++ *code = GEU; ++ break; ++ case GTU: ++ swap = true; ++ *code = LTU; ++ break; ++ case GE: ++ case LT: ++ case GEU: ++ case LTU: ++ /* Do nothing. */ ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1); ++ if (swap) ++ { ++ rtx tmp = *op1; ++ *op1 = *op0; ++ *op0 = tmp; ++ } ++ } ++ else ++ { ++ /* The comparison needs a separate scc instruction. Store the ++ result of the scc in *OP0 and compare it against zero. */ ++ bool invert = false; ++ *op0 = gen_reg_rtx (GET_MODE (cmp_op0)); ++ loongarch_emit_int_order_test (*code, &invert, *op0, cmp_op0, cmp_op1); ++ *code = (invert ? EQ : NE); ++ *op1 = const0_rtx; ++ } ++ } ++ else ++ { ++ enum rtx_code cmp_code; ++ ++ /* Floating-point tests use a separate FCMP.cond.fmt ++ comparison to set a register. The branch or conditional move will ++ then compare that register against zero. ++ ++ Set CMP_CODE to the code of the comparison instruction and ++ *CODE to the code that the branch or move should use. */ ++ cmp_code = *code; ++ /* Three FP conditions cannot be implemented by reversing the ++ operands for FCMP.cond.fmt, instead a reversed condition code is ++ required and a test for false. */ ++ *code = loongarch_reversed_fp_cond (&cmp_code) ? EQ : NE; ++ *op0 = loongarch_allocate_fcc (CCmode); ++ ++ *op1 = const0_rtx; ++ loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); ++ } ++} ++ ++/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2] ++ and OPERAND[3]. Store the result in OPERANDS[0]. ++ ++ On 64-bit targets, the mode of the comparison and target will always be ++ SImode, thus possibly narrower than that of the comparison's operands. */ ++ ++void ++loongarch_expand_scc (rtx operands[]) ++{ ++ rtx target = operands[0]; ++ enum rtx_code code = GET_CODE (operands[1]); ++ rtx op0 = operands[2]; ++ rtx op1 = operands[3]; ++ ++ gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT); ++ ++ if (code == EQ || code == NE) ++ { ++ { ++ rtx zie = loongarch_zero_if_equal (op0, op1); ++ loongarch_emit_binary (code, target, zie, const0_rtx); ++ } ++ } ++ else ++ loongarch_emit_int_order_test (code, 0, target, op0, op1); ++} ++ ++/* Compare OPERANDS[1] with OPERANDS[2] using comparison code ++ CODE and jump to OPERANDS[3] if the condition holds. */ ++ ++void ++loongarch_expand_conditional_branch (rtx *operands) ++{ ++ enum rtx_code code = GET_CODE (operands[0]); ++ rtx op0 = operands[1]; ++ rtx op1 = operands[2]; ++ rtx condition; ++ ++ loongarch_emit_compare (&code, &op0, &op1, 0); ++ condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); ++ emit_jump_insn (gen_condjump (condition, operands[3])); ++} ++ ++/* Perform the comparison in OPERANDS[1]. Move OPERANDS[2] into OPERANDS[0] ++ if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0]. */ ++ ++bool ++loongarch_expand_conditional_move (rtx *operands) ++{ ++ rtx cond; ++ enum rtx_code code = GET_CODE (operands[1]); ++ rtx op0 = XEXP (operands[1], 0); ++ rtx op1 = XEXP (operands[1], 1); ++ ++ loongarch_emit_compare (&code, &op0, &op1, true); ++ cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); ++ ++ /* There is no direct support for general conditional GP move involving ++ two registers using SEL. */ ++ if (INTEGRAL_MODE_P (GET_MODE (operands[2])) ++ && register_operand (operands[2], VOIDmode) ++ && register_operand (operands[3], VOIDmode)) ++ { ++ machine_mode mode = GET_MODE (operands[0]); ++ rtx temp = gen_reg_rtx (mode); ++ rtx temp2 = gen_reg_rtx (mode); ++ ++ emit_insn (gen_rtx_SET (temp, ++ gen_rtx_IF_THEN_ELSE (mode, cond, ++ operands[2], const0_rtx))); ++ ++ /* Flip the test for the second operand. */ ++ cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1); ++ ++ emit_insn (gen_rtx_SET (temp2, ++ gen_rtx_IF_THEN_ELSE (mode, cond, ++ operands[3], const0_rtx))); ++ ++ /* Merge the two results, at least one is guaranteed to be zero. */ ++ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2))); ++ ++ return true; ++ } ++ else ++ return false; ++} ++ ++ ++/* Initialize *CUM for a call to a function of type FNTYPE. */ ++ ++void ++loongarch_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype) ++{ ++ memset (cum, 0, sizeof (*cum)); ++ cum->prototype = (fntype && prototype_p (fntype)); ++ cum->gp_reg_found = (cum->prototype && stdarg_p (fntype)); ++} ++ ++/* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the first ++ byte of the stack slot has useful data, PAD_DOWNWARD if the last byte ++ does. */ ++ ++static pad_direction ++loongarch_function_arg_padding (machine_mode mode, const_tree type) ++{ ++ /* On little-endian targets, the first byte of every stack argument ++ is passed in the first byte of the stack slot. */ ++ if (!BYTES_BIG_ENDIAN) ++ return PAD_UPWARD; ++ ++ /* Otherwise, integral types are padded downward: the last byte of a ++ stack argument is passed in the last byte of the stack slot. */ ++ if (type != 0 ++ ? (INTEGRAL_TYPE_P (type) ++ || POINTER_TYPE_P (type) ++ || FIXED_POINT_TYPE_P (type)) ++ : (SCALAR_INT_MODE_P (mode) ++ || ALL_SCALAR_FIXED_POINT_MODE_P (mode))) ++ return PAD_DOWNWARD; ++ ++ return PAD_UPWARD; ++} ++ ++/* Likewise BLOCK_REG_PADDING (MODE, TYPE, ...). Return !BYTES_BIG_ENDIAN ++ if the least significant byte of the register has useful data. Return ++ the opposite if the most significant byte does. */ ++ ++bool ++loongarch_pad_reg_upward (machine_mode mode, tree type) ++{ ++ /* No shifting is required for floating-point arguments. */ ++ if (type != 0 ? FLOAT_TYPE_P (type) : GET_MODE_CLASS (mode) == MODE_FLOAT) ++ return !BYTES_BIG_ENDIAN; ++ ++ /* Otherwise, apply the same padding to register arguments as we do ++ to stack arguments. */ ++ return loongarch_function_arg_padding (mode, type) == PAD_UPWARD; ++} ++ ++/* Implement TARGET_EXPAND_BUILTIN_VA_START. */ ++ ++static void ++loongarch_va_start (tree valist, rtx nextarg) ++{ ++ nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size); ++ std_expand_builtin_va_start (valist, nextarg); ++} ++ ++ ++/* Start a definition of function NAME. */ ++ ++static void ++loongarch_start_function_definition (const char *name) ++{ ++ ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function"); ++ ++ /* Start the definition proper. */ ++ assemble_name (asm_out_file, name); ++ fputs (":\n", asm_out_file); ++} ++ ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++ ++static bool ++loongarch_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) ++{ ++ if (!TARGET_SIBCALLS) ++ return false; ++ ++ /* Interrupt handlers need special epilogue code and therefore can't ++ use sibcalls. */ ++ if (loongarch_interrupt_type_p (TREE_TYPE (current_function_decl))) ++ return false; ++ ++ /* Otherwise OK. */ ++ return true; ++} ++ ++/* Implement TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ ++ ++bool ++loongarch_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, ++ unsigned int align, ++ enum by_pieces_operation op, ++ bool speed_p) ++{ ++ if (op == STORE_BY_PIECES) ++ return loongarch_store_by_pieces_p (size, align); ++ if (op == MOVE_BY_PIECES && HAVE_movmemsi) ++ { ++ /* movmemsi is meant to generate code that is at least as good as ++ move_by_pieces. However, movmemsi effectively uses a by-pieces ++ implementation both for moves smaller than a word and for ++ word-aligned moves of no more than LARCH_MAX_MOVE_BYTES_STRAIGHT ++ bytes. We should allow the tree-level optimisers to do such ++ moves by pieces, as it often exposes other optimization ++ opportunities. We might as well continue to use movmemsi at ++ the rtl level though, as it produces better code when ++ scheduling is disabled (such as at -O). */ ++ if (currently_expanding_to_rtl) ++ return false; ++ if (align < BITS_PER_WORD) ++ return size < UNITS_PER_WORD; ++ return size <= LARCH_MAX_MOVE_BYTES_STRAIGHT; ++ } ++ ++ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); ++} ++ ++/* Implement a handler for STORE_BY_PIECES operations ++ for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ ++ ++bool ++loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) ++{ ++ /* Storing by pieces involves moving constants into registers ++ of size MIN (ALIGN, BITS_PER_WORD), then storing them. ++ We need to decide whether it is cheaper to load the address of ++ constant data into a register and use a block move instead. */ ++ ++ /* If the data is only byte aligned, then: ++ ++ (a1) A block move of less than 4 bytes would involve three 3 LD.Bs and ++ 3 ST.Bs. We might as well use 3 single-instruction LIs and 3 SD.Bs ++ instead. ++ ++ (a2) A block move of 4 bytes from aligned source data can use an ++ LD.W/ST.W sequence. This is often better than the 4 LIs and ++ 4 SD.Bs that we would generate when storing by pieces. */ ++ if (align <= BITS_PER_UNIT) ++ return size < 4; ++ ++ /* If the data is 2-byte aligned, then: ++ ++ (b1) A block move of less than 4 bytes would use a combination of LD.Bs, ++ LD.Hs, SD.Bs and SD.Hs. We get better code by using single-instruction ++ LIs, SD.Bs and SD.Hs instead. ++ ++ (b2) A block move of 4 bytes from aligned source data would again use ++ an LD.W/ST.W sequence. In most cases, loading the address of ++ the source data would require at least one extra instruction. ++ It is often more efficient to use 2 single-instruction LIs and ++ 2 SHs instead. ++ ++ (b3) A block move of up to 3 additional bytes would be like (b1). ++ ++ (b4) A block move of 8 bytes from aligned source data can use two ++ LD.W/ST.W sequences. Both sequences are better than the 4 LIs ++ and 4 ST.Hs that we'd generate when storing by pieces. ++ ++ The reasoning for higher alignments is similar: ++ ++ (c1) A block move of less than 4 bytes would be the same as (b1). ++ ++ (c2) A block move of 4 bytes would use an LD.W/ST.W sequence. Again, ++ loading the address of the source data would typically require ++ at least one extra instruction. It is generally better to use ++ LUI/ORI/SW instead. ++ ++ (c3) A block move of up to 3 additional bytes would be like (b1). ++ ++ (c4) A block move of 8 bytes can use two LD.W/ST.W sequences or a single ++ LD.D/ST.D sequence, and in these cases we've traditionally preferred ++ the memory copy over the more bulky constant moves. */ ++ return size < 8; ++} ++ ++/* Emit straight-line code to move LENGTH bytes from SRC to DEST. ++ Assume that the areas do not overlap. */ ++ ++static void ++loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) ++{ ++ HOST_WIDE_INT offset, delta; ++ unsigned HOST_WIDE_INT bits; ++ int i; ++ machine_mode mode; ++ rtx *regs; ++ ++ /* Work out how many bits to move at a time. If both operands have ++ half-word alignment, it is usually better to move in half words. ++ For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr ++ and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr. ++ Otherwise move word-sized chunks. ++ ++ For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise ++ picking the minimum of alignment or BITS_PER_WORD gets us the ++ desired size for bits. */ ++ ++ bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))); ++ ++ mode = int_mode_for_size (bits, 0).require (); ++ delta = bits / BITS_PER_UNIT; ++ ++ /* Allocate a buffer for the temporary registers. */ ++ regs = XALLOCAVEC (rtx, length / delta); ++ ++ /* Load as many BITS-sized chunks as possible. Use a normal load if ++ the source has enough alignment, otherwise use left/right pairs. */ ++ for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) ++ { ++ regs[i] = gen_reg_rtx (mode); ++ loongarch_emit_move (regs[i], adjust_address (src, mode, offset)); ++ } ++ ++ ++ for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) ++ loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]); ++ ++ /* Mop up any left-over bytes. */ ++ if (offset < length) ++ { ++ src = adjust_address (src, BLKmode, offset); ++ dest = adjust_address (dest, BLKmode, offset); ++ move_by_pieces (dest, src, length - offset, ++ MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); ++ } ++} ++ ++/* Helper function for doing a loop-based block operation on memory ++ reference MEM. Each iteration of the loop will operate on LENGTH ++ bytes of MEM. ++ ++ Create a new base register for use within the loop and point it to ++ the start of MEM. Create a new memory reference that uses this ++ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ ++ ++static void ++loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, ++ rtx *loop_reg, rtx *loop_mem) ++{ ++ *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); ++ ++ /* Although the new mem does not refer to a known location, ++ it does keep up to LENGTH bytes of alignment. */ ++ *loop_mem = change_address (mem, BLKmode, *loop_reg); ++ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); ++} ++ ++/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER ++ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that ++ the memory regions do not overlap. */ ++ ++static void ++loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, ++ HOST_WIDE_INT bytes_per_iter) ++{ ++ rtx_code_label *label; ++ rtx src_reg, dest_reg, final_src, test; ++ HOST_WIDE_INT leftover; ++ ++ leftover = length % bytes_per_iter; ++ length -= leftover; ++ ++ /* Create registers and memory references for use within the loop. */ ++ loongarch_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); ++ loongarch_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); ++ ++ /* Calculate the value that SRC_REG should have after the last iteration ++ of the loop. */ ++ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), ++ 0, 0, OPTAB_WIDEN); ++ ++ /* Emit the start of the loop. */ ++ label = gen_label_rtx (); ++ emit_label (label); ++ ++ /* Emit the loop body. */ ++ loongarch_block_move_straight (dest, src, bytes_per_iter); ++ ++ /* Move on to the next block. */ ++ loongarch_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); ++ loongarch_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); ++ ++ /* Emit the loop condition. */ ++ test = gen_rtx_NE (VOIDmode, src_reg, final_src); ++ if (Pmode == DImode) ++ emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label)); ++ else ++ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); ++ ++ /* Mop up any left-over bytes. */ ++ if (leftover) ++ loongarch_block_move_straight (dest, src, leftover); ++ else ++ /* Temporary fix for PR79150. */ ++ emit_insn (gen_nop ()); ++} ++ ++/* Expand a movmemsi instruction, which copies LENGTH bytes from ++ memory reference SRC to memory reference DEST. */ ++ ++bool ++loongarch_expand_block_move (rtx dest, rtx src, rtx length) ++{ ++ ++ int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT; ++ ++ if (CONST_INT_P (length) && INTVAL (length) <= loongarch_max_inline_memcpy_size) ++ { ++ if (INTVAL (length) <= max_move_bytes) ++ { ++ loongarch_block_move_straight (dest, src, INTVAL (length)); ++ return true; ++ } ++ else if (optimize) ++ { ++ loongarch_block_move_loop (dest, src, INTVAL (length), ++ LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER); ++ return true; ++ } ++ } ++ return false; ++} ++ ++ ++/* Expand a QI or HI mode atomic memory operation. ++ ++ GENERATOR contains a pointer to the gen_* function that generates ++ the SI mode underlying atomic operation using masks that we ++ calculate. ++ ++ RESULT is the return register for the operation. Its value is NULL ++ if unused. ++ ++ MEM is the location of the atomic access. ++ ++ OLDVAL is the first operand for the operation. ++ ++ NEWVAL is the optional second operand for the operation. Its value ++ is NULL if unused. */ ++ ++void ++loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, ++ rtx result, rtx mem, rtx oldval, ++ rtx newval, rtx model) ++{ ++ rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask; ++ rtx unshifted_mask_reg, mask, inverted_mask, si_op; ++ rtx res = NULL; ++ rtx tmp = NULL; ++ machine_mode mode; ++ ++ mode = GET_MODE (mem); ++ ++ /* Compute the address of the containing SImode value. */ ++ orig_addr = force_reg (Pmode, XEXP (mem, 0)); ++ memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr, ++ force_reg (Pmode, GEN_INT (-4))); ++ ++ /* Create a memory reference for it. */ ++ memsi = gen_rtx_MEM (SImode, memsi_addr); ++ set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); ++ MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); ++ ++ /* Work out the byte offset of the QImode or HImode value, ++ counting from the least significant byte. */ ++ shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3)); ++ /* if (TARGET_BIG_ENDIAN) */ ++ /* loongarch_emit_binary (XOR, shift, shift, GEN_INT (mode == QImode ? 3 : 2)); */ ++ ++ /* Multiply by eight to convert the shift value from bytes to bits. */ ++ loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3)); ++ ++ /* Make the final shift an SImode value, so that it can be used in ++ SImode operations. */ ++ shiftsi = force_reg (SImode, gen_lowpart (SImode, shift)); ++ ++ /* Set MASK to an inclusive mask of the QImode or HImode value. */ ++ unshifted_mask = GEN_INT (GET_MODE_MASK (mode)); ++ unshifted_mask_reg = force_reg (SImode, unshifted_mask); ++ mask = loongarch_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi); ++ ++ /* Compute the equivalent exclusive mask. */ ++ inverted_mask = gen_reg_rtx (SImode); ++ emit_insn (gen_rtx_SET (inverted_mask, gen_rtx_NOT (SImode, mask))); ++ ++ /* Shift the old value into place. */ ++ if (oldval != const0_rtx) ++ { ++ oldval = convert_modes (SImode, mode, oldval, true); ++ oldval = force_reg (SImode, oldval); ++ oldval = loongarch_force_binary (SImode, ASHIFT, oldval, shiftsi); ++ } ++ ++ /* Do the same for the new value. */ ++ if (newval && newval != const0_rtx) ++ { ++ newval = convert_modes (SImode, mode, newval, true); ++ newval = force_reg (SImode, newval); ++ newval = loongarch_force_binary (SImode, ASHIFT, newval, shiftsi); ++ } ++ ++ /* Do the SImode atomic access. */ ++ if (result) ++ res = gen_reg_rtx (SImode); ++ ++ if (newval) ++ si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model); ++ else if (result) ++ si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model); ++ else ++ si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model); ++ ++ //si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model); ++ ++ emit_insn (si_op); ++ ++ if (result) ++ { ++ /* Shift and convert the result. */ ++ loongarch_emit_binary (AND, res, res, mask); ++ loongarch_emit_binary (LSHIFTRT, res, res, shiftsi); ++ loongarch_emit_move (result, gen_lowpart (GET_MODE (result), res)); ++ } ++} ++ ++/* Return true if X is a MEM with the same size as MODE. */ ++ ++bool ++loongarch_mem_fits_mode_p (machine_mode mode, rtx x) ++{ ++ return (MEM_P (x) ++ && MEM_SIZE_KNOWN_P (x) ++ && MEM_SIZE (x) == GET_MODE_SIZE (mode)); ++} ++ ++/* Return true if (zero_extract OP WIDTH BITPOS) can be used as the ++ source of an "ext" instruction or the destination of an "ins" ++ instruction. OP must be a register operand and the following ++ conditions must hold: ++ ++ 0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op)) ++ 0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) ++ 0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) ++ ++ Also reject lengths equal to a word as they are better handled ++ by the move patterns. */ ++ ++bool ++loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos) ++{ ++ if (!register_operand (op, VOIDmode) ++ || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD) ++ return false; ++ ++ if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1)) ++ return false; ++ ++ if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op))) ++ return false; ++ ++ return true; ++} ++ ++ ++/* Return true iff OP1 and OP2 are valid operands together for the ++ *and3 patterns. For the cases to consider, ++ see the table in the comment before the pattern. */ ++ ++bool ++and_operands_ok (machine_mode mode, rtx op1, rtx op2) ++{ ++ ++ if (memory_operand (op1, mode)) ++ { ++ return and_load_operand (op2, mode); ++ } ++ else ++ return and_reg_operand (op2, mode); ++} ++ ++ ++/* Wrappers around loongarch_push_asm_switch_1 and loongarch_pop_asm_switch_1 ++ that either print a complete line or print nothing. */ ++ ++void ++loongarch_push_asm_switch (struct loongarch_asm_switch *asm_switch) ++{ ++// loongarch_push_asm_switch_1 (asm_switch, "\t", "\n"); ++} ++ ++void ++loongarch_pop_asm_switch (struct loongarch_asm_switch *asm_switch) ++{ ++// loongarch_pop_asm_switch_1 (asm_switch, "\t", "\n"); ++} ++ ++/* Print the text for PRINT_OPERAND punctation character CH to FILE. ++ The punctuation characters are: ++ ++ '.' Print the name of the register with a hard-wired zero (zero or $r0). ++ '$' Print the name of the stack pointer register (sp or $r3). ++ ':' Print "c" to use the compact version if the delay slot is a nop. ++ '!' Print "s" to use the short version if the delay slot contains a ++ 16-bit instruction. ++ ++ See also loongarch_init_print_operand_punct. */ ++ ++static void ++loongarch_print_operand_punctuation (FILE *file, int ch) ++{ ++ switch (ch) ++ { ++ case '.': ++ fputs (reg_names[GP_REG_FIRST + 0], file); ++ break; ++ ++ case '$': ++ fputs (reg_names[STACK_POINTER_REGNUM], file); ++ break; ++ ++ case ':': ++ /* When final_sequence is 0, the delay slot will be a nop. We can ++ use the compact version where available. The %: formatter will ++ only be present if a compact form of the branch is available. */ ++ if (final_sequence == 0) ++ putc ('c', file); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ break; ++ } ++} ++ ++/* Initialize loongarch_print_operand_punct. */ ++ ++static void ++loongarch_init_print_operand_punct (void) ++{ ++ const char *p; ++ ++ for (p = ".$:"; *p; p++) ++ loongarch_print_operand_punct[(unsigned char) *p] = true; ++} ++ ++/* PRINT_OPERAND prefix LETTER refers to the integer branch instruction ++ associated with condition CODE. Print the condition part of the ++ opcode to FILE. */ ++ ++static void ++loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter) ++{ ++ switch (code) ++ { ++ case EQ: ++ case NE: ++ case GT: ++ case GE: ++ case LT: ++ case LE: ++ case GTU: ++ case GEU: ++ case LTU: ++ case LEU: ++ /* Conveniently, the LARCH names for these conditions are the same ++ as their RTL equivalents. */ ++ fputs (GET_RTX_NAME (code), file); ++ break; ++ ++ default: ++ output_operand_lossage ("'%%%c' is not a valid operand prefix", letter); ++ break; ++ } ++} ++ ++/* Likewise floating-point branches. */ ++ ++static void ++loongarch_print_float_branch_condition (FILE *file, enum rtx_code code, int letter) ++{ ++ switch (code) ++ { ++ case EQ: ++ fputs ("ceqz", file); ++ break; ++ ++ case NE: ++ fputs ("cnez", file); ++ break; ++ ++ default: ++ output_operand_lossage ("'%%%c' is not a valid operand prefix", letter); ++ break; ++ } ++} ++ ++/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ ++ ++static bool ++loongarch_print_operand_punct_valid_p (unsigned char code) ++{ ++ return loongarch_print_operand_punct[code]; ++} ++ ++/* Return true if a FENCE should be emitted to before a memory access to ++ implement the release portion of memory model MODEL. */ ++ ++static bool ++loongarch_memmodel_needs_release_fence (enum memmodel model) ++{ ++ switch (model) ++ { ++ case MEMMODEL_ACQ_REL: ++ case MEMMODEL_SEQ_CST: ++ case MEMMODEL_SYNC_SEQ_CST: ++ case MEMMODEL_RELEASE: ++ case MEMMODEL_SYNC_RELEASE: ++ return true; ++ ++ case MEMMODEL_ACQUIRE: ++ case MEMMODEL_CONSUME: ++ case MEMMODEL_SYNC_ACQUIRE: ++ case MEMMODEL_RELAXED: ++ return false; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++ ++/* Implement TARGET_PRINT_OPERAND. The LARCH-specific operand codes are: ++ ++ 'X' Print CONST_INT OP in hexadecimal format. ++ 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. ++ 'd' Print CONST_INT OP in decimal. ++ 'm' Print one less than CONST_INT OP in decimal. ++ 'y' Print exact log2 of CONST_INT OP in decimal. ++ 'h' Print the high-part relocation associated with OP, after stripping ++ any outermost HIGH. ++ 'R' Print the low-part relocation associated with OP. ++ 'C' Print the integer branch condition for comparison OP. ++ 'N' Print the inverse of the integer branch condition for comparison OP. ++ 'F' Print the FPU branch condition for comparison OP. ++ 'W' Print the inverse of the FPU branch condition for comparison OP. ++ 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), ++ 'z' for (eq:?I ...), 'n' for (ne:?I ...). ++ 't' Like 'T', but with the EQ/NE cases reversed ++ 'Y' Print loongarch_fp_conditions[INTVAL (OP)] ++ 'Z' Print OP and a comma for 8CC, otherwise print nothing. ++ 'D' Print the second part of a double-word register or memory operand. ++ 'L' Print the low-order register in a double-word register operand. ++ 'M' Print high-order register in a double-word register operand. ++ 'z' Print $0 if OP is zero, otherwise print OP normally. ++ 'b' Print the address of a memory operand, without offset. ++ 'V' Print exact log2 of CONST_INT OP element 0 of a replicated ++ CONST_VECTOR in decimal. ++ 'A' Print a _DB suffix if the memory model requires a release. ++ 'G' Print a DBAR insn if the memory model requires a release. ++ 'i' Print i if the operand is not a register. */ ++ ++static void ++loongarch_print_operand (FILE *file, rtx op, int letter) ++{ ++ enum rtx_code code; ++ ++ if (loongarch_print_operand_punct_valid_p (letter)) ++ { ++ loongarch_print_operand_punctuation (file, letter); ++ return; ++ } ++ ++ gcc_assert (op); ++ code = GET_CODE (op); ++ ++ switch (letter) ++ { ++ case 'X': ++ if (CONST_INT_P (op)) ++ fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op)); ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ ++ case 'x': ++ if (CONST_INT_P (op)) ++ fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff); ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ ++ case 'd': ++ if (CONST_INT_P (op)) ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op)); ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ ++ case 'm': ++ if (CONST_INT_P (op)) ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1); ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ ++ case 'y': ++ if (CONST_INT_P (op)) ++ { ++ int val = exact_log2 (INTVAL (op)); ++ if (val != -1) ++ fprintf (file, "%d", val); ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ } ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ ++ case 'V': ++ if (GET_CODE (op) == CONST_VECTOR) ++ { ++ machine_mode mode = GET_MODE_INNER (GET_MODE (op)); ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0)); ++ int vlog2 = exact_log2 (val & GET_MODE_MASK (mode)); ++ if (vlog2 != -1) ++ fprintf (file, "%d", vlog2); ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ } ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ ++ case 'C': ++ loongarch_print_int_branch_condition (file, code, letter); ++ break; ++ ++ case 'N': ++ loongarch_print_int_branch_condition (file, reverse_condition (code), letter); ++ break; ++ ++ case 'F': ++ loongarch_print_float_branch_condition (file, code, letter); ++ break; ++ ++ case 'W': ++ loongarch_print_float_branch_condition (file, reverse_condition (code), ++ letter); ++ break; ++ ++ case 'T': ++ case 't': ++ { ++ int truth = (code == NE) == (letter == 'T'); ++ fputc ("zfnt"[truth * 2 + ST_REG_P (REGNO (XEXP (op, 0)))], file); ++ } ++ break; ++ ++ case 'Y': ++ if (code == CONST_INT && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions)) ++ fputs (loongarch_fp_conditions[UINTVAL (op)], file); ++ else ++ output_operand_lossage ("'%%%c' is not a valid operand prefix", ++ letter); ++ break; ++ ++ case 'Z': ++ loongarch_print_operand (file, op, 0); ++ fputc (',', file); ++ break; ++ ++ case 'A': ++ if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) ++ fputs ("_db", file); ++ break; ++ ++ case 'G': ++ if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) ++ fputs ("dbar\t0", file); ++ break; ++ ++ case 'i': ++ if (code != REG) ++ fputs ("i", file); ++ break; ++ ++ default: ++ switch (code) ++ { ++ case REG: ++ { ++ unsigned int regno = REGNO (op); ++ if ((letter == 'M') ++ || (letter == 'L' ) ++ || letter == 'D') ++ regno++; ++ else if (letter && letter != 'z' && letter != 'M' && letter != 'L') ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ fprintf (file, "%s", reg_names[regno]); ++ } ++ break; ++ ++ case MEM: ++ if (letter == 'D') ++ output_address (GET_MODE (op), plus_constant (Pmode, ++ XEXP (op, 0), 4)); ++ else if (letter == 'b') ++ { ++ gcc_assert (REG_P (XEXP (op, 0))); ++ loongarch_print_operand (file, XEXP (op, 0), 0); ++ } ++ else if (letter && letter != 'z') ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ else ++ output_address (GET_MODE (op), XEXP (op, 0)); ++ break; ++ ++ default: ++ if (letter == 'z' && op == CONST0_RTX (GET_MODE (op))) ++ fputs (reg_names[GP_REG_FIRST], file); ++ else if (letter && letter != 'z') ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ else ++ output_addr_const (file, loongarch_strip_unspec_address (op)); ++ break; ++ } ++ } ++} ++ ++/* Implement TARGET_PRINT_OPERAND_ADDRESS. */ ++ ++static void ++loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) ++{ ++ struct loongarch_address_info addr; ++ ++ if (loongarch_classify_address (&addr, x, word_mode, true)) ++ switch (addr.type) ++ { ++ case ADDRESS_REG: ++ fprintf (file, "%s,", reg_names[REGNO (addr.reg)]); ++ loongarch_print_operand (file, addr.offset, 0); ++ return; ++ ++ case ADDRESS_CONST_INT: ++ fprintf (file, "%s,", reg_names[GP_REG_FIRST]); ++ output_addr_const (file, x); ++ return; ++ ++ case ADDRESS_SYMBOLIC: ++ output_addr_const (file, loongarch_strip_unspec_address (x)); ++ return; ++ } ++ if (GET_CODE (x) == CONST_INT) ++ output_addr_const (file, x); ++ else ++ gcc_unreachable (); ++} ++ ++ ++/* Implement TARGET_ENCODE_SECTION_INFO. */ ++ ++static void ++loongarch_encode_section_info (tree decl, rtx rtl, int first) ++{ ++ default_encode_section_info (decl, rtl, first); ++ ++ if (TREE_CODE (decl) == FUNCTION_DECL) ++ { ++ rtx symbol = XEXP (rtl, 0); ++ tree type = TREE_TYPE (decl); ++ ++ /* Encode whether the symbol is short or long. */ ++ if ((TARGET_LONG_CALLS && !loongarch_near_type_p (type)) ++ || loongarch_far_type_p (type)) ++ SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_LONG_CALL; ++ } ++} ++ ++/* Implement TARGET_SELECT_RTX_SECTION. */ ++ ++static section * ++loongarch_select_rtx_section (machine_mode mode, rtx x, ++ unsigned HOST_WIDE_INT align) ++{ ++ /* ??? Consider using mergeable small data sections. */ ++ if (loongarch_rtx_constant_in_small_data_p (mode)) ++ return get_named_section (NULL, ".sdata", 0); ++ ++ return default_elf_select_rtx_section (mode, x, align); ++} ++ ++/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION. ++ ++ The complication here is that, with the combination ++ !TARGET_ABSOLUTE_ABICALLS , jump tables will use ++ absolute addresses, and should therefore not be included in the ++ read-only part of a DSO. Handle such cases by selecting a normal ++ data section instead of a read-only one. The logic apes that in ++ default_function_rodata_section. */ ++ ++static section * ++loongarch_function_rodata_section (tree decl) ++{ ++ return default_function_rodata_section (decl); ++} ++ ++/* Implement TARGET_IN_SMALL_DATA_P. */ ++ ++static bool ++loongarch_in_small_data_p (const_tree decl) ++{ ++ unsigned HOST_WIDE_INT size; ++ ++ if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL) ++ return false; ++ ++ /* We don't yet generate small-data references for ++ VxWorks RTP code. See the related -G handling in ++ loongarch_option_override. */ ++ if (TARGET_VXWORKS_RTP) ++ return false; ++ ++ if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0) ++ { ++ const char *name; ++ ++ /* Reject anything that isn't in a known small-data section. */ ++ name = DECL_SECTION_NAME (decl); ++ if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0) ++ return false; ++ ++ /* If a symbol is defined externally, the assembler will use the ++ usual -G rules when deciding how to implement macros. */ ++ if (!DECL_EXTERNAL (decl)) ++ return true; ++ } ++ else if (TARGET_EMBEDDED_DATA) ++ { ++ /* Don't put constants into the small data section: we want them ++ to be in ROM rather than RAM. */ ++ if (TREE_CODE (decl) != VAR_DECL) ++ return false; ++ ++ if (TREE_READONLY (decl) ++ && !TREE_SIDE_EFFECTS (decl) ++ && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl)))) ++ return false; ++ } ++ ++ /* Enforce -mlocal-sdata. */ ++ if (!TARGET_LOCAL_SDATA && !TREE_PUBLIC (decl)) ++ return false; ++ ++ /* Enforce -mextern-sdata. */ ++ if (!TARGET_EXTERN_SDATA && DECL_P (decl)) ++ { ++ if (DECL_EXTERNAL (decl)) ++ return false; ++ if (DECL_COMMON (decl) && DECL_INITIAL (decl) == NULL) ++ return false; ++ } ++ ++ /* We have traditionally not treated zero-sized objects as small data, ++ so this is now effectively part of the ABI. */ ++ size = int_size_in_bytes (TREE_TYPE (decl)); ++ return size > 0 && size <= loongarch_small_data_threshold; ++} ++ ++/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P. We don't want to use ++ anchors for small data: the GP register acts as an anchor in that ++ case. We also don't want to use them for PC-relative accesses, ++ where the PC acts as an anchor. */ ++ ++static bool ++loongarch_use_anchors_for_symbol_p (const_rtx symbol) ++{ ++ return default_use_anchors_for_symbol_p (symbol); ++} ++ ++/* The LARCH debug format wants all automatic variables and arguments ++ to be in terms of the virtual frame pointer (stack pointer before ++ any adjustment in the function), while the LARCH 3.0 linker wants ++ the frame pointer to be the stack pointer after the initial ++ adjustment. So, we do the adjustment here. The arg pointer (which ++ is eliminated) points to the virtual frame pointer, while the frame ++ pointer (which may be eliminated) points to the stack pointer after ++ the initial adjustments. */ ++ ++HOST_WIDE_INT ++loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset) ++{ ++ rtx offset2 = const0_rtx; ++ rtx reg = eliminate_constant_term (addr, &offset2); ++ ++ if (offset == 0) ++ offset = INTVAL (offset2); ++ ++ if (reg == stack_pointer_rtx ++ || reg == frame_pointer_rtx ++ || reg == hard_frame_pointer_rtx) ++ { ++ offset -= cfun->machine->frame.total_size; ++ if (reg == hard_frame_pointer_rtx) ++ offset += cfun->machine->frame.hard_frame_pointer_offset; ++ } ++ ++ return offset; ++} ++ ++/* Implement ASM_OUTPUT_EXTERNAL. */ ++ ++void ++loongarch_output_external (FILE *file, tree decl, const char *name) ++{ ++ default_elf_asm_output_external (file, decl, name); ++ ++ /* We output the name if and only if TREE_SYMBOL_REFERENCED is ++ set in order to avoid putting out names that are never really ++ used. */ ++ if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) ++ { ++ if (loongarch_in_small_data_p (decl)) ++ { ++ /* When using assembler macros, emit .extern directives for ++ all small-data externs so that the assembler knows how ++ big they are. ++ ++ In most cases it would be safe (though pointless) to emit ++ .externs for other symbols too. One exception is when an ++ object is within the -G limit but declared by the user to ++ be in a section other than .sbss or .sdata. */ ++ fputs ("\t.extern\t", file); ++ assemble_name (file, name); ++ fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n", ++ int_size_in_bytes (TREE_TYPE (decl))); ++ } ++ } ++} ++ ++/* Implement TARGET_ASM_OUTPUT_SOURCE_FILENAME. */ ++ ++static void ++loongarch_output_filename (FILE *stream, const char *name) ++{ ++ /* If we are emitting DWARF-2, let dwarf2out handle the ".file" ++ directives. */ ++ if (write_symbols == DWARF2_DEBUG) ++ return; ++ else if (loongarch_output_filename_first_time) ++ { ++ loongarch_output_filename_first_time = 0; ++ num_source_filenames += 1; ++ current_function_file = name; ++ fprintf (stream, "\t.file\t%d ", num_source_filenames); ++ output_quoted_string (stream, name); ++ putc ('\n', stream); ++ } ++ /* If we are emitting stabs, let dbxout.c handle this (except for ++ the loongarch_output_filename_first_time case). */ ++ else if (write_symbols == DBX_DEBUG) ++ return; ++ else if (name != current_function_file ++ && strcmp (name, current_function_file) != 0) ++ { ++ num_source_filenames += 1; ++ current_function_file = name; ++ fprintf (stream, "\t.file\t%d ", num_source_filenames); ++ output_quoted_string (stream, name); ++ putc ('\n', stream); ++ } ++} ++ ++/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL. */ ++ ++static void ATTRIBUTE_UNUSED ++loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x) ++{ ++ switch (size) ++ { ++ case 4: ++ fputs ("\t.dtprelword\t", file); ++ break; ++ ++ case 8: ++ fputs ("\t.dtpreldword\t", file); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ output_addr_const (file, x); ++ fputs ("+0x8000", file); ++} ++ ++/* Implement TARGET_DWARF_REGISTER_SPAN. */ ++ ++static rtx ++loongarch_dwarf_register_span (rtx reg) ++{ ++ rtx high, low; ++ machine_mode mode; ++ ++ mode = GET_MODE (reg); ++ /* By default, GCC maps increasing register numbers to increasing ++ memory locations, but paired FPRs are always little-endian, ++ regardless of the prevailing endianness. */ ++ if (FP_REG_P (REGNO (reg)) ++ && MAX_FPRS_PER_FMT > 1 ++ && GET_MODE_SIZE (mode) > UNITS_PER_FPREG) ++ { ++ gcc_assert (GET_MODE_SIZE (mode) == UNITS_PER_HWFPVALUE); ++ high = loongarch_subword (reg, true); ++ low = loongarch_subword (reg, false); ++ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, high, low)); ++ } ++ ++ return NULL_RTX; ++} ++ ++/* Implement TARGET_DWARF_FRAME_REG_MODE. */ ++ ++static machine_mode ++loongarch_dwarf_frame_reg_mode (int regno) ++{ ++ machine_mode mode = default_dwarf_frame_reg_mode (regno); ++ ++ if (FP_REG_P (regno) && loongarch_abi == ABILP32 && TARGET_FLOAT64) ++ mode = SImode; ++ ++ return mode; ++} ++ ++ ++/* Implement ASM_OUTPUT_ASCII. */ ++ ++void ++loongarch_output_ascii (FILE *stream, const char *string, size_t len) ++{ ++ size_t i; ++ int cur_pos; ++ ++ cur_pos = 17; ++ fprintf (stream, "\t.ascii\t\""); ++ for (i = 0; i < len; i++) ++ { ++ int c; ++ ++ c = (unsigned char) string[i]; ++ if (ISPRINT (c)) ++ { ++ if (c == '\\' || c == '\"') ++ { ++ putc ('\\', stream); ++ cur_pos++; ++ } ++ putc (c, stream); ++ cur_pos++; ++ } ++ else ++ { ++ fprintf (stream, "\\%03o", c); ++ cur_pos += 4; ++ } ++ ++ if (cur_pos > 72 && i+1 < len) ++ { ++ cur_pos = 17; ++ fprintf (stream, "\"\n\t.ascii\t\""); ++ } ++ } ++ fprintf (stream, "\"\n"); ++} ++ ++/* Emit either a label, .comm, or .lcomm directive. When using assembler ++ macros, mark the symbol as written so that loongarch_asm_output_external ++ won't emit an .extern for it. STREAM is the output file, NAME is the ++ name of the symbol, INIT_STRING is the string that should be written ++ before the symbol and FINAL_STRING is the string that should be ++ written after it. FINAL_STRING is a printf format that consumes the ++ remaining arguments. */ ++ ++void ++loongarch_declare_object (FILE *stream, const char *name, const char *init_string, ++ const char *final_string, ...) ++{ ++ va_list ap; ++ ++ fputs (init_string, stream); ++ assemble_name (stream, name); ++ va_start (ap, final_string); ++ vfprintf (stream, final_string, ap); ++ va_end (ap); ++ ++ tree name_tree = get_identifier (name); ++ TREE_ASM_WRITTEN (name_tree) = 1; ++} ++ ++/* Declare a common object of SIZE bytes using asm directive INIT_STRING. ++ NAME is the name of the object and ALIGN is the required alignment ++ in bytes. TAKES_ALIGNMENT_P is true if the directive takes a third ++ alignment argument. */ ++ ++void ++loongarch_declare_common_object (FILE *stream, const char *name, ++ const char *init_string, ++ unsigned HOST_WIDE_INT size, ++ unsigned int align, bool takes_alignment_p) ++{ ++ if (!takes_alignment_p) ++ { ++ size += (align / BITS_PER_UNIT) - 1; ++ size -= size % (align / BITS_PER_UNIT); ++ loongarch_declare_object (stream, name, init_string, ++ "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size); ++ } ++ else ++ loongarch_declare_object (stream, name, init_string, ++ "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", ++ size, align / BITS_PER_UNIT); ++} ++ ++/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON. This is usually the same as the ++ elfos.h version, but we also need to handle -muninit-const-in-rodata. */ ++ ++void ++loongarch_output_aligned_decl_common (FILE *stream, tree decl, const char *name, ++ unsigned HOST_WIDE_INT size, ++ unsigned int align) ++{ ++ /* If the target wants uninitialized const declarations in ++ .rdata then don't put them in .comm. */ ++ if (TARGET_EMBEDDED_DATA ++ && TARGET_UNINIT_CONST_IN_RODATA ++ && TREE_CODE (decl) == VAR_DECL ++ && TREE_READONLY (decl) ++ && (DECL_INITIAL (decl) == 0 || DECL_INITIAL (decl) == error_mark_node)) ++ { ++ if (TREE_PUBLIC (decl) && DECL_NAME (decl)) ++ targetm.asm_out.globalize_label (stream, name); ++ ++ switch_to_section (readonly_data_section); ++ ASM_OUTPUT_ALIGN (stream, floor_log2 (align / BITS_PER_UNIT)); ++ loongarch_declare_object (stream, name, "", ++ ":\n\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n", ++ size); ++ } ++ else ++ loongarch_declare_common_object (stream, name, "\n\t.comm\t", ++ size, align, true); ++} ++ ++#ifdef ASM_OUTPUT_SIZE_DIRECTIVE ++extern int size_directive_output; ++ ++/* Implement ASM_DECLARE_OBJECT_NAME. This is like most of the standard ELF ++ definitions except that it uses loongarch_declare_object to emit the label. */ ++ ++void ++loongarch_declare_object_name (FILE *stream, const char *name, ++ tree decl ATTRIBUTE_UNUSED) ++{ ++#ifdef ASM_OUTPUT_TYPE_DIRECTIVE ++#ifdef USE_GNU_UNIQUE_OBJECT ++ /* As in elfos.h. */ ++ if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (decl) ++ && (!DECL_ARTIFICIAL (decl) || !TREE_READONLY (decl))) ++ ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "gnu_unique_object"); ++ else ++#endif ++ ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); ++#endif ++ ++ size_directive_output = 0; ++ if (!flag_inhibit_size_directive && DECL_SIZE (decl)) ++ { ++ HOST_WIDE_INT size; ++ ++ size_directive_output = 1; ++ size = int_size_in_bytes (TREE_TYPE (decl)); ++ ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); ++ } ++ ++ loongarch_declare_object (stream, name, "", ":\n"); ++} ++ ++/* Implement ASM_FINISH_DECLARE_OBJECT. This is generic ELF stuff. */ ++ ++void ++loongarch_finish_declare_object (FILE *stream, tree decl, int top_level, int at_end) ++{ ++ const char *name; ++ ++ name = XSTR (XEXP (DECL_RTL (decl), 0), 0); ++ if (!flag_inhibit_size_directive ++ && DECL_SIZE (decl) != 0 ++ && !at_end ++ && top_level ++ && DECL_INITIAL (decl) == error_mark_node ++ && !size_directive_output) ++ { ++ HOST_WIDE_INT size; ++ ++ size_directive_output = 1; ++ size = int_size_in_bytes (TREE_TYPE (decl)); ++ ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); ++ } ++} ++#endif ++ ++/* Mark text contents as code or data, mainly for the purpose of correct ++ disassembly. Emit a local symbol and set its type appropriately for ++ that purpose. Also emit `.insn' if marking contents as code so that ++ the ISA mode is recorded and any padding that follows is disassembled ++ as correct instructions. */ ++ ++void ++loongarch_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED, ++ const char *prefix ATTRIBUTE_UNUSED, ++ unsigned long num ATTRIBUTE_UNUSED, ++ bool function_p ATTRIBUTE_UNUSED) ++{ ++#ifdef ASM_OUTPUT_TYPE_DIRECTIVE ++ char buf[(sizeof (num) * 10) / 4 + 2]; ++ const char *fnname; ++ char *sname; ++ rtx symbol; ++ ++ sprintf (buf, "%lu", num); ++ symbol = XEXP (DECL_RTL (current_function_decl), 0); ++ fnname = targetm.strip_name_encoding (XSTR (symbol, 0)); ++ sname = ACONCAT ((prefix, fnname, "_", buf, NULL)); ++ ++ ASM_OUTPUT_TYPE_DIRECTIVE (file, sname, function_p ? "function" : "object"); ++ assemble_name (file, sname); ++ fputs (":\n", file); ++// if (function_p) ++// fputs ("\t.insn\n", file); ++#endif ++} ++ ++/* Implement TARGET_ASM_FILE_START. */ ++ ++static void ++loongarch_file_start (void) ++{ ++ default_file_start (); ++} ++ ++/* Implement TARGET_ASM_CODE_END. */ ++ ++static void ++loongarch_code_end (void) ++{ ++ if (NEED_INDICATE_EXEC_STACK) ++ /* Add .note.GNU-stack. */ ++ file_end_indicate_exec_stack (); ++} ++ ++/* Make the last instruction frame-related and note that it performs ++ the operation described by FRAME_PATTERN. */ ++ ++static void ++loongarch_set_frame_expr (rtx frame_pattern) ++{ ++ rtx_insn *insn; ++ ++ insn = get_last_insn (); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, ++ frame_pattern, ++ REG_NOTES (insn)); ++} ++ ++/* Return a frame-related rtx that stores REG at MEM. ++ REG must be a single register. */ ++ ++static rtx ++loongarch_frame_set (rtx mem, rtx reg) ++{ ++ rtx set; ++ ++ set = gen_rtx_SET (mem, reg); ++ RTX_FRAME_RELATED_P (set) = 1; ++ ++ return set; ++} ++ ++/* Record that the epilogue has restored call-saved register REG. */ ++ ++static void ++loongarch_add_cfa_restore (rtx reg) ++{ ++ loongarch_epilogue.cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, ++ loongarch_epilogue.cfa_restores); ++} ++ ++ ++/* Return true if REGNO is a register that is ordinarily call-clobbered ++ but must nevertheless be preserved by an interrupt handler. */ ++ ++static bool ++loongarch_interrupt_extra_call_saved_reg_p (unsigned int regno) ++{ ++ if (GP_REG_P (regno) ++ && cfun->machine->use_shadow_register_set == SHADOW_SET_NO) ++ { ++ /* $0 is hard-wired. */ ++ if (regno == GP_REG_FIRST) ++ return false; ++ ++ /* The function will return the stack pointer to its original value ++ anyway. */ ++ if (regno == STACK_POINTER_REGNUM) ++ return false; ++ ++ /* Otherwise, return true for registers that aren't ordinarily ++ call-clobbered. */ ++ return call_used_regs[regno]; ++ } ++ ++ return false; ++} ++ ++/* Return true if the current function should treat register REGNO ++ as call-saved. */ ++ ++static bool ++loongarch_cfun_call_saved_reg_p (unsigned int regno) ++{ ++ /* If the user makes an ordinarily-call-saved register global, ++ that register is no longer call-saved. */ ++ if (global_regs[regno]) ++ return false; ++ ++ /* Interrupt handlers need to save extra registers. */ ++ if (cfun->machine->interrupt_handler_p ++ && loongarch_interrupt_extra_call_saved_reg_p (regno)) ++ return true; ++ ++ return !call_used_regs[regno]; ++} ++ ++/* Return true if the function body might clobber register REGNO. ++ We know that REGNO is call-saved. */ ++ ++static bool ++loongarch_cfun_might_clobber_call_saved_reg_p (unsigned int regno) ++{ ++ /* Some functions should be treated as clobbering all call-saved ++ registers. */ ++ if (crtl->saves_all_registers) ++ return true; ++ ++ /* DF handles cases where a register is explicitly referenced in ++ the rtl. Incoming values are passed in call-clobbered registers, ++ so we can assume that any live call-saved register is set within ++ the function. */ ++ if (df_regs_ever_live_p (regno)) ++ return true; ++ ++ /* Check for registers that are clobbered by FUNCTION_PROFILER. ++ These clobbers are not explicit in the rtl. */ ++ if (crtl->profile && LARCH_SAVE_REG_FOR_PROFILING_P (regno)) ++ return true; ++ ++ /* The function's prologue will need to set the frame pointer if ++ frame_pointer_needed. */ ++ if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) ++ return true; ++ ++ ++ /* If REGNO is ordinarily call-clobbered, we must assume that any ++ called function could modify it. */ ++ if (cfun->machine->interrupt_handler_p ++ && !crtl->is_leaf ++ && loongarch_interrupt_extra_call_saved_reg_p (regno)) ++ return true; ++ ++ return false; ++} ++ ++/* Return true if the current function must save register REGNO. */ ++ ++static bool ++loongarch_save_reg_p (unsigned int regno) ++{ ++ if (loongarch_cfun_call_saved_reg_p (regno)) ++ { ++ if (loongarch_cfun_might_clobber_call_saved_reg_p (regno)) ++ return true; ++ ++ /* Save both registers in an FPR pair if either one is used. This is ++ needed for the case when MIN_FPRS_PER_FMT == 1, which allows the odd ++ register to be used without the even register. */ ++ if (FP_REG_P (regno) ++ && MAX_FPRS_PER_FMT == 2 ++ && loongarch_cfun_might_clobber_call_saved_reg_p (regno + 1)) ++ return true; ++ } ++ ++ /* We need to save the incoming return address if __builtin_eh_return ++ is being used to set a different return address. */ ++ if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) ++ return true; ++ ++ return false; ++} ++ ++/* Populate the current function's loongarch_frame_info structure. ++ ++ LARCH stack frames look like: ++ ++ +-------------------------------+ ++ | | ++ | incoming stack arguments | ++ | | ++ +-------------------------------+ ++ | | ++ | caller-allocated save area | ++ A | for register arguments | ++ | | ++ +-------------------------------+ <-- incoming stack pointer ++ | | ++ | callee-allocated save area | ++ B | for arguments that are | ++ | split between registers and | ++ | the stack | ++ | | ++ +-------------------------------+ <-- arg_pointer_rtx ++ | | ++ C | callee-allocated save area | ++ | for register varargs | ++ | | ++ +-------------------------------+ <-- frame_pointer_rtx ++ | | + cop0_sp_offset ++ | COP0 reg save area | + UNITS_PER_WORD ++ | | ++ +-------------------------------+ <-- frame_pointer_rtx + acc_sp_offset ++ | | + UNITS_PER_WORD ++ | accumulator save area | ++ | | ++ +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset ++ | | + UNITS_PER_HWFPVALUE ++ | FPR save area | ++ | | ++ +-------------------------------+ <-- stack_pointer_rtx + gp_sp_offset ++ | | + UNITS_PER_WORD ++ | GPR save area | ++ | | ++ +-------------------------------+ <-- frame_pointer_rtx with ++ | | \ -fstack-protector ++ | local variables | | var_size ++ | | / ++ +-------------------------------+ ++ | | \ ++ | $gp save area | | cprestore_size ++ | | / ++ +-------------------------------+ | args_size ++ | | | ++ | caller-allocated save area | | ++ | for register arguments | | ++ | | / ++ +-------------------------------+ <-- stack_pointer_rtx ++ frame_pointer_rtx without ++ -fstack-protector ++ hard_frame_pointer_rtx for ++ code. ++ ++ At least two of A, B and C will be empty. ++ ++ Dynamic stack allocations such as alloca insert data at point P. ++ They decrease stack_pointer_rtx but leave frame_pointer_rtx and ++ hard_frame_pointer_rtx unchanged. */ ++ ++static void ++loongarch_compute_frame_info (void) ++{ ++ struct loongarch_frame_info *frame; ++ HOST_WIDE_INT offset, size; ++ unsigned int regno, i; ++ ++ /* Skip re-computing the frame info after reload completed. */ ++ if (reload_completed) ++ return; ++ ++ /* Set this function's interrupt properties. */ ++ if (loongarch_interrupt_type_p (TREE_TYPE (current_function_decl))) ++ { ++ error ("the % attribute is not supported."); ++ // need to be improved !! ++ } ++ ++ frame = &cfun->machine->frame; ++ memset (frame, 0, sizeof (*frame)); ++ size = get_frame_size (); ++ ++ /* The first two blocks contain the outgoing argument area and the $gp save ++ slot. This area isn't needed in leaf functions. We can also skip it ++ if we know that none of the called functions will use this space. ++ ++ But if the target-independent frame size is nonzero, we have already ++ committed to allocating these in TARGET_STARTING_FRAME_OFFSET for ++ !FRAME_GROWS_DOWNWARD. */ ++ ++ if ((size == 0 || FRAME_GROWS_DOWNWARD) ++ && (crtl->is_leaf || (cfun->machine->optimize_call_stack && !flag_pic))) ++ { ++ /* The LARCH 3.0 linker does not like functions that dynamically ++ allocate the stack and have 0 for STACK_DYNAMIC_OFFSET, since it ++ looks like we are trying to create a second frame pointer to the ++ function, so allocate some stack space to make it happy. */ ++ if (cfun->calls_alloca) ++ frame->args_size = REG_PARM_STACK_SPACE (cfun->decl); ++ else ++ frame->args_size = 0; ++ frame->cprestore_size = 0; ++ } ++ else ++ { ++ frame->args_size = crtl->outgoing_args_size; ++ frame->cprestore_size = 0; ++ } ++ ++ ++ offset = frame->args_size + frame->cprestore_size; ++ ++ /* Move above the local variables. */ ++ frame->var_size = LARCH_STACK_ALIGN (size); ++ offset += frame->var_size; ++ ++ /* Find out which GPRs we need to save. */ ++ for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (loongarch_save_reg_p (regno)) ++ { ++ frame->num_gp++; ++ frame->mask |= 1 << (regno - GP_REG_FIRST); ++ } ++ ++ /* If this function calls eh_return, we must also save and restore the ++ EH data registers. */ ++ if (crtl->calls_eh_return) ++ for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++) ++ { ++ frame->num_gp++; ++ frame->mask |= 1 << (EH_RETURN_DATA_REGNO (i) - GP_REG_FIRST); ++ } ++ ++ ++ /* Move above the GPR save area. */ ++ if (frame->num_gp > 0) ++ { ++ offset += LARCH_STACK_ALIGN (frame->num_gp * UNITS_PER_WORD); ++ frame->gp_sp_offset = offset - UNITS_PER_WORD; ++ } ++ ++ /* Find out which FPRs we need to save. This loop must iterate over ++ the same space as its companion in loongarch_for_each_saved_gpr_and_fpr. */ ++ if (TARGET_HARD_FLOAT) ++ for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno += MAX_FPRS_PER_FMT) ++ if (loongarch_save_reg_p (regno)) ++ { ++ frame->num_fp += MAX_FPRS_PER_FMT; ++ frame->fmask |= ~(~0U << MAX_FPRS_PER_FMT) << (regno - FP_REG_FIRST); ++ } ++ ++ /* Move above the FPR save area. */ ++ if (frame->num_fp > 0) ++ { ++ offset += LARCH_STACK_ALIGN (frame->num_fp * UNITS_PER_FPREG); ++ frame->fp_sp_offset = offset - UNITS_PER_HWFPVALUE; ++ } ++ ++ /* Add in space for the interrupt context information. */ ++ if (cfun->machine->interrupt_handler_p) ++ { ++ // need to be improved !! ++ } ++ ++ /* Move above the accumulator save area. */ ++ if (frame->num_acc > 0) ++ { ++ /* Each accumulator needs 2 words. */ ++ offset += frame->num_acc * 2 * UNITS_PER_WORD; ++ frame->acc_sp_offset = offset - UNITS_PER_WORD; ++ } ++ ++ /* Move above the COP0 register save area. */ ++ if (frame->num_cop0_regs > 0) ++ { ++ offset += frame->num_cop0_regs * UNITS_PER_WORD; ++ frame->cop0_sp_offset = offset - UNITS_PER_WORD; ++ } ++ ++ /* Determine if we can save the callee-saved registers in the frame ++ header. Restrict this to functions where there is no other reason ++ to allocate stack space so that we can eliminate the instructions ++ that modify the stack pointer. */ ++ ++ if (TARGET_OLDABI ++ && optimize > 0 ++ && flag_frame_header_optimization ++ && !MAIN_NAME_P (DECL_NAME (current_function_decl)) ++ && cfun->machine->varargs_size == 0 ++ && crtl->args.pretend_args_size == 0 ++ && frame->var_size == 0 ++ && frame->num_acc == 0 ++ && frame->num_cop0_regs == 0 ++ && frame->num_fp == 0 ++ && frame->num_gp > 0 ++ && frame->num_gp <= MAX_ARGS_IN_REGISTERS ++ && !cfun->machine->interrupt_handler_p ++ && cfun->machine->does_not_use_frame_header ++ && cfun->machine->optimize_call_stack ++ && !cfun->machine->callers_may_not_allocate_frame) ++ { ++ offset = 0; ++ frame->gp_sp_offset = REG_PARM_STACK_SPACE(cfun) - UNITS_PER_WORD; ++ cfun->machine->use_frame_header_for_callee_saved_regs = true; ++ } ++ ++ /* Move above the callee-allocated varargs save area. */ ++ offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); ++ frame->arg_pointer_offset = offset; ++ ++ /* Move above the callee-allocated area for pretend stack arguments. */ ++ offset += crtl->args.pretend_args_size; ++ frame->total_size = offset; ++ ++ /* Work out the offsets of the save areas from the top of the frame. */ ++ if (frame->gp_sp_offset > 0) ++ frame->gp_save_offset = frame->gp_sp_offset - offset; ++ if (frame->fp_sp_offset > 0) ++ frame->fp_save_offset = frame->fp_sp_offset - offset; ++ if (frame->acc_sp_offset > 0) ++ frame->acc_save_offset = frame->acc_sp_offset - offset; ++ if (frame->num_cop0_regs > 0) ++ frame->cop0_save_offset = frame->cop0_sp_offset - offset; ++} ++ ++/* Implement TARGET_FRAME_POINTER_REQUIRED. */ ++ ++static bool ++loongarch_frame_pointer_required (void) ++{ ++ /* If the function contains dynamic stack allocations, we need to ++ use the frame pointer to access the static parts of the frame. */ ++ if (cfun->calls_alloca) ++ return true; ++ ++ return false; ++} ++ ++/* Make sure that we're not trying to eliminate to the wrong hard frame ++ pointer. */ ++ ++static bool ++loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) ++{ ++ return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM); ++} ++ ++/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer ++ or argument pointer. TO is either the stack pointer or hard frame ++ pointer. */ ++ ++HOST_WIDE_INT ++loongarch_initial_elimination_offset (int from, int to) ++{ ++ HOST_WIDE_INT offset; ++ ++ loongarch_compute_frame_info (); ++ ++ /* Set OFFSET to the offset from the end-of-prologue stack pointer. */ ++ switch (from) ++ { ++ case FRAME_POINTER_REGNUM: ++ if (FRAME_GROWS_DOWNWARD) ++ offset = (cfun->machine->frame.args_size ++ + cfun->machine->frame.cprestore_size ++ + cfun->machine->frame.var_size); ++ else ++ offset = 0; ++ break; ++ ++ case ARG_POINTER_REGNUM: ++ offset = cfun->machine->frame.arg_pointer_offset; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (to == HARD_FRAME_POINTER_REGNUM) ++ offset -= cfun->machine->frame.hard_frame_pointer_offset; ++ ++ return offset; ++} ++ ++ ++/* Implement RETURN_ADDR_RTX. We do not support moving back to a ++ previous frame. */ ++ ++rtx ++loongarch_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) ++{ ++ if (count != 0) ++ return const0_rtx; ++ ++ return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); ++} ++ ++/* Emit code to change the current function's return address to ++ ADDRESS. SCRATCH is available as a scratch register, if needed. ++ ADDRESS and SCRATCH are both word-mode GPRs. */ ++ ++void ++loongarch_set_return_address (rtx address, rtx scratch) ++{ ++ rtx slot_address; ++ ++ gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)); ++ slot_address = loongarch_add_offset (scratch, stack_pointer_rtx, ++ cfun->machine->frame.gp_sp_offset); ++ loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address); ++} ++ ++ ++/* Fill *BASE and *OFFSET such that *BASE + *OFFSET refers to the ++ cprestore slot. LOAD_P is true if the caller wants to load from ++ the cprestore slot; it is false if the caller wants to store to ++ the slot. */ ++ ++static void ++loongarch_get_cprestore_base_and_offset (rtx *base, HOST_WIDE_INT *offset, ++ bool load_p) ++{ ++ const struct loongarch_frame_info *frame; ++ ++ frame = &cfun->machine->frame; ++ /* .cprestore always uses the stack pointer instead of the frame pointer. ++ We have a free choice for direct stores, ++ Using the stack pointer would sometimes give more ++ (early) scheduling freedom, but using the frame pointer would ++ sometimes give more (late) scheduling freedom. It's hard to ++ predict which applies to a given function, so let's keep things ++ simple. ++ ++ Loads must always use the frame pointer in functions that call ++ alloca, and there's little benefit to using the stack pointer ++ otherwise. */ ++ if (frame_pointer_needed) ++ { ++ *base = hard_frame_pointer_rtx; ++ *offset = frame->args_size - frame->hard_frame_pointer_offset; ++ } ++ else ++ { ++ *base = stack_pointer_rtx; ++ *offset = frame->args_size; ++ } ++} ++ ++/* Return true if X is the load or store address of the cprestore slot; ++ LOAD_P says which. */ ++ ++bool ++loongarch_cprestore_address_p (rtx x, bool load_p) ++{ ++ rtx given_base, required_base; ++ HOST_WIDE_INT given_offset, required_offset; ++ ++ loongarch_split_plus (x, &given_base, &given_offset); ++ loongarch_get_cprestore_base_and_offset (&required_base, &required_offset, load_p); ++ return given_base == required_base && given_offset == required_offset; ++} ++ ++ ++/* A function to save or store a register. The first argument is the ++ register and the second is the stack slot. */ ++typedef void (*loongarch_save_restore_fn) (rtx, rtx); ++ ++/* Use FN to save or restore register REGNO. MODE is the register's ++ mode and OFFSET is the offset of its save slot from the current ++ stack pointer. */ ++ ++static void ++loongarch_save_restore_reg (machine_mode mode, int regno, ++ HOST_WIDE_INT offset, loongarch_save_restore_fn fn) ++{ ++ rtx mem; ++ ++ mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, ++ offset)); ++ fn (gen_rtx_REG (mode, regno), mem); ++} ++ ++/* Save register REG to MEM. Make the instruction frame-related. */ ++ ++static void ++loongarch_save_reg (rtx reg, rtx mem) ++{ ++ if (GET_MODE (reg) == DFmode ++ && (!TARGET_FLOAT64 ++ || loongarch_abi == ABILP32)) ++ { ++ rtx x1, x2; ++ ++ loongarch_emit_move_or_split (mem, reg, SPLIT_IF_NECESSARY); ++ ++ x1 = loongarch_frame_set (loongarch_subword (mem, false), ++ loongarch_subword (reg, false)); ++ x2 = loongarch_frame_set (loongarch_subword (mem, true), ++ loongarch_subword (reg, true)); ++ loongarch_set_frame_expr (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x1, x2))); ++ } ++ else ++ loongarch_emit_save_slot_move (mem, reg, LARCH_PROLOGUE_TEMP (GET_MODE (reg))); ++} ++ ++/* Call FN for each register that is saved by the current function. ++ SP_OFFSET is the offset of the current stack pointer from the start ++ of the frame. */ ++ ++static void ++loongarch_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset, ++ loongarch_save_restore_fn fn) ++{ ++ machine_mode fpr_mode; ++ int regno; ++ const struct loongarch_frame_info *frame = &cfun->machine->frame; ++ HOST_WIDE_INT offset; ++ unsigned int mask; ++ ++ /* Save registers starting from high to low. The debuggers prefer at least ++ the return register be stored at func+4, and also it allows us not to ++ need a nop in the epilogue if at least one register is reloaded in ++ addition to return address. */ ++ offset = frame->gp_sp_offset - sp_offset; ++ mask = frame->mask; ++ ++ for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) ++ if (BITSET_P (mask, regno - GP_REG_FIRST)) ++ { ++ /* Record the ra offset for use by loongarch_function_profiler. */ ++ if (regno == RETURN_ADDR_REGNUM) ++ cfun->machine->frame.ra_fp_offset = offset + sp_offset; ++ loongarch_save_restore_reg (word_mode, regno, offset, fn); ++ offset -= UNITS_PER_WORD; ++ } ++ ++ /* This loop must iterate over the same space as its companion in ++ loongarch_compute_frame_info. */ ++ offset = cfun->machine->frame.fp_sp_offset - sp_offset; ++ fpr_mode = (TARGET_SINGLE_FLOAT ? SFmode : DFmode); ++ for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1; ++ regno >= FP_REG_FIRST; ++ regno -= MAX_FPRS_PER_FMT) ++ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) ++ { ++ if (!TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT ++ && (fixed_regs[regno] || fixed_regs[regno + 1])) ++ { ++ if (fixed_regs[regno]) ++ loongarch_save_restore_reg (SFmode, regno + 1, offset, fn); ++ else ++ loongarch_save_restore_reg (SFmode, regno, offset, fn); ++ } ++ else ++ loongarch_save_restore_reg (fpr_mode, regno, offset, fn); ++ offset -= GET_MODE_SIZE (fpr_mode); ++ } ++} ++ ++ ++/* Return true if a move between register REGNO and its save slot (MEM) ++ can be done in a single move. LOAD_P is true if we are loading ++ from the slot, false if we are storing to it. */ ++ ++static bool ++loongarch_direct_save_slot_move_p (unsigned int regno, rtx mem, bool load_p) ++{ ++ ++ return loongarch_secondary_reload_class (REGNO_REG_CLASS (regno), ++ GET_MODE (mem), mem, load_p) == NO_REGS; ++} ++ ++/* Emit a move from SRC to DEST, given that one of them is a register ++ save slot and that the other is a register. TEMP is a temporary ++ GPR of the same mode that is available if need be. */ ++ ++void ++loongarch_emit_save_slot_move (rtx dest, rtx src, rtx temp) ++{ ++ unsigned int regno; ++ rtx mem; ++ ++ if (REG_P (src)) ++ { ++ regno = REGNO (src); ++ mem = dest; ++ } ++ else ++ { ++ regno = REGNO (dest); ++ mem = src; ++ } ++ ++ if (loongarch_direct_save_slot_move_p (regno, mem, mem == src)) ++ loongarch_emit_move (dest, src); ++ else ++ { ++ gcc_assert (!reg_overlap_mentioned_p (dest, temp)); ++ loongarch_emit_move (temp, src); ++ loongarch_emit_move (dest, temp); ++ } ++ if (MEM_P (dest)) ++ loongarch_set_frame_expr (loongarch_frame_set (dest, src)); ++} ++ ++ ++/* Implement ASM_DECLARE_FUNCTION_NAME. */ ++ ++void loongarch_declare_function_name(FILE *stream ATTRIBUTE_UNUSED, ++ const char *name, tree fndecl ATTRIBUTE_UNUSED) ++{ ++ loongarch_start_function_definition (name); ++} ++ ++/* Implement TARGET_OUTPUT_FUNCTION_PROLOGUE. */ ++ ++static void ++loongarch_output_function_prologue (FILE *file) ++{ ++// const char *fnname; ++ ++ ++ /* Get the function name the same way that toplev.c does before calling ++ assemble_start_function. This is needed so that the name used here ++ exactly matches the name used in ASM_DECLARE_FUNCTION_NAME. */ ++// fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); ++// loongarch_start_function_definition (fnname); ++} ++ ++/* Implement TARGET_OUTPUT_FUNCTION_EPILOGUE. */ ++ ++static void ++loongarch_output_function_epilogue (FILE *) ++{ ++ const char *fnname; ++ ++ /* Get the function name the same way that toplev.c does before calling ++ assemble_start_function. This is needed so that the name used here ++ exactly matches the name used in ASM_DECLARE_FUNCTION_NAME. */ ++ fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); ++} ++ ++/* Emit an optimisation barrier for accesses to the current frame. */ ++ ++static void ++loongarch_frame_barrier (void) ++{ ++ emit_clobber (gen_frame_mem (BLKmode, stack_pointer_rtx)); ++} ++ ++ ++#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) ++ ++#if PROBE_INTERVAL > 16384 ++#error Cannot use indexed addressing mode for stack probing ++#endif ++ ++/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, ++ inclusive. These are offsets from the current stack pointer. */ ++ ++static void ++loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) ++{ ++ ++ /* See if we have a constant small number of probes to generate. If so, ++ that's the easy case. */ ++ if (first + size <= 2048) ++ { ++ HOST_WIDE_INT i; ++ ++ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until ++ it exceeds SIZE. If only one probe is needed, this will not ++ generate any code. Then probe at FIRST + SIZE. */ ++ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -(first + i))); ++ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -(first + size))); ++ } ++ ++ /* Otherwise, do the same as above, but in a loop. Note that we must be ++ extra careful with variables wrapping around because we might be at ++ the very top (or the very bottom) of the address space and we have ++ to be able to handle this case properly; in particular, we use an ++ equality test for the loop condition. */ ++ else ++ { ++ HOST_WIDE_INT rounded_size; ++ rtx r13 = LARCH_PROLOGUE_TEMP (Pmode); ++ rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode); ++ ++ /* Sanity check for the addressing mode we're going to use. */ ++ gcc_assert (first <= 16384); ++ ++ ++ /* Step 1: round SIZE to the previous multiple of the interval. */ ++ ++ rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); ++ ++ ++ /* Step 2: compute initial and final value of the loop counter. */ ++ ++ /* TEST_ADDR = SP + FIRST. */ ++ if (first > 2048) ++ { ++ emit_move_insn (r12, GEN_INT (first)); ++ emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode, stack_pointer_rtx, ++ r12))); ++ } ++ else ++ emit_insn (gen_rtx_SET (r13, plus_constant (Pmode, stack_pointer_rtx, ++ -first))); ++ ++ /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ ++ if (rounded_size > 2048) ++ { ++ emit_move_insn (r12, GEN_INT (rounded_size)); ++ emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12))); ++ } ++ else ++ emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, r13, ++ -rounded_size))); ++ ++ ++ /* Step 3: the loop ++ ++ do ++ { ++ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL ++ probe at TEST_ADDR ++ } ++ while (TEST_ADDR != LAST_ADDR) ++ ++ probes at FIRST + N * PROBE_INTERVAL for values of N from 1 ++ until it is equal to ROUNDED_SIZE. */ ++ ++ emit_insn (PMODE_INSN (gen_probe_stack_range, (r13, r13, r12))); ++ ++ ++ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time ++ that SIZE is equal to ROUNDED_SIZE. */ ++ ++ if (size != rounded_size) ++ emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); ++ } ++ ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); ++} ++ ++/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are ++ absolute addresses. */ ++ ++const char * ++loongarch_output_probe_stack_range (rtx reg1, rtx reg2) ++{ ++ static int labelno = 0; ++ char loop_lab[32], tmp[64]; ++ rtx xops[2]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); ++ ++ /* Loop. */ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); ++ ++ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ ++ xops[0] = reg1; ++ xops[1] = GEN_INT (-PROBE_INTERVAL/(PROBE_INTERVAL/2048)); ++ for (int i = 0; i < PROBE_INTERVAL/2048; i++) ++ if (TARGET_64BIT && TARGET_LONG64) ++ output_asm_insn ("addi.d\t%0,%0,%1", xops); ++ else ++ output_asm_insn ("addi.w\t%0,%0,%1", xops); ++ ++ /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */ ++ xops[1] = reg2; ++ strcpy (tmp, "bne\t%0,%1,"); ++ if (TARGET_64BIT) ++ output_asm_insn ("st.d\t$zero,%0,0", xops); ++ else ++ output_asm_insn ("st.w\t$zero,%0,0", xops); ++ output_asm_insn (strcat (tmp, &loop_lab[1]), xops); ++ ++ return ""; ++} ++ ++/* Expand the "prologue" pattern. */ ++ ++void ++loongarch_expand_prologue (void) ++{ ++ const struct loongarch_frame_info *frame; ++ HOST_WIDE_INT size; ++ unsigned int nargs; ++ ++ frame = &cfun->machine->frame; ++ size = frame->total_size; ++ ++ if (flag_stack_usage_info) ++ current_function_static_stack_size = size; ++ ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) ++ { ++ if (crtl->is_leaf && !cfun->calls_alloca) ++ { ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ loongarch_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect ()); ++ } ++ else if (size > 0) ++ loongarch_emit_probe_stack_range (get_stack_check_protect (), size); ++ } ++ ++ /* Save the registers. Allocate up to LARCH_MAX_FIRST_STACK_STEP ++ bytes beforehand; this is enough to cover the register save area ++ without going out of range. */ ++ if (((frame->mask | frame->fmask | frame->acc_mask) != 0) ++ || frame->num_cop0_regs > 0) ++ { ++ HOST_WIDE_INT step1; ++ ++ step1 = MIN (size, LARCH_MAX_FIRST_STACK_STEP); ++ { ++ if (cfun->machine->interrupt_handler_p) ++ { ++// need to be improved ++ } ++ else ++ { ++ if (step1 != 0) ++ { ++ rtx insn = gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-step1)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ loongarch_frame_barrier (); ++ size -= step1; ++ } ++ } ++ loongarch_for_each_saved_gpr_and_fpr (size, loongarch_save_reg); ++ } ++ } ++ ++ /* Allocate the rest of the frame. */ ++ if (size > 0) ++ { ++ if (SMALL_OPERAND (-size)) ++ RTX_FRAME_RELATED_P (emit_insn (gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-size)))) = 1; ++ else ++ { ++ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (size)); ++ emit_insn (gen_sub3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ LARCH_PROLOGUE_TEMP (Pmode))); ++ ++ /* Describe the combined effect of the previous instructions. */ ++ loongarch_set_frame_expr ++ (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, -size))); ++ } ++ loongarch_frame_barrier (); ++ } ++ ++ /* Set up the frame pointer, if we're using one. */ ++ if (frame_pointer_needed) ++ { ++ HOST_WIDE_INT offset; ++ ++ offset = frame->hard_frame_pointer_offset; ++ if (offset == 0) ++ { ++ rtx insn = loongarch_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ else if (SMALL_OPERAND (offset)) ++ { ++ rtx insn = gen_add3_insn (hard_frame_pointer_rtx, ++ stack_pointer_rtx, GEN_INT (offset)); ++ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; ++ } ++ else ++ { ++ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (offset)); ++ loongarch_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx); ++ emit_insn (gen_add3_insn (hard_frame_pointer_rtx, ++ hard_frame_pointer_rtx, ++ LARCH_PROLOGUE_TEMP (Pmode))); ++ loongarch_set_frame_expr ++ (gen_rtx_SET (hard_frame_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, offset))); ++ } ++ } ++ ++ /* If we are profiling, make sure no instructions are scheduled before ++ the call to mcount. */ ++ if (crtl->profile) ++ emit_insn (gen_blockage ()); ++} ++ ++/* Attach all pending register saves to the previous instruction. ++ Return that instruction. */ ++ ++static rtx_insn * ++loongarch_epilogue_emit_cfa_restores (void) ++{ ++ rtx_insn *insn; ++ ++ insn = get_last_insn (); ++ if (loongarch_epilogue.cfa_restores) ++ { ++ gcc_assert (insn && !REG_NOTES (insn)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ REG_NOTES (insn) = loongarch_epilogue.cfa_restores; ++ loongarch_epilogue.cfa_restores = 0; ++ } ++ return insn; ++} ++ ++/* Like loongarch_epilogue_emit_cfa_restores, but also record that the CFA is ++ now at REG + OFFSET. */ ++ ++static void ++loongarch_epilogue_set_cfa (rtx reg, HOST_WIDE_INT offset) ++{ ++ rtx_insn *insn; ++ ++ insn = loongarch_epilogue_emit_cfa_restores (); ++ if (reg != loongarch_epilogue.cfa_reg || offset != loongarch_epilogue.cfa_offset) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, ++ plus_constant (Pmode, reg, offset), ++ REG_NOTES (insn)); ++ loongarch_epilogue.cfa_reg = reg; ++ loongarch_epilogue.cfa_offset = offset; ++ } ++} ++ ++/* Emit instructions to restore register REG from slot MEM. Also update ++ the cfa_restores list. */ ++ ++static void ++loongarch_restore_reg (rtx reg, rtx mem) ++{ ++ /* There's instruction to load $31 directly. Load into ++ $7 instead and adjust the return insn appropriately. */ ++ if (GET_MODE (reg) == DFmode ++ && (!TARGET_FLOAT64 ++ || loongarch_abi == ABILP32)) ++ { ++ loongarch_add_cfa_restore (loongarch_subword (reg, true)); ++ loongarch_add_cfa_restore (loongarch_subword (reg, false)); ++ } ++ else ++ loongarch_add_cfa_restore (reg); ++ ++ loongarch_emit_save_slot_move (reg, mem, LARCH_EPILOGUE_TEMP (GET_MODE (reg))); ++ if (REGNO (reg) == REGNO (loongarch_epilogue.cfa_reg)) ++ /* The CFA is currently defined in terms of the register whose ++ value we have just restored. Redefine the CFA in terms of ++ the stack pointer. */ ++ loongarch_epilogue_set_cfa (stack_pointer_rtx, ++ loongarch_epilogue.cfa_restore_sp_offset); ++} ++ ++/* Emit code to set the stack pointer to BASE + OFFSET, given that ++ BASE + OFFSET is NEW_FRAME_SIZE bytes below the top of the frame. ++ BASE, if not the stack pointer, is available as a temporary. */ ++ ++static void ++loongarch_deallocate_stack (rtx base, rtx offset, HOST_WIDE_INT new_frame_size) ++{ ++ if (base == stack_pointer_rtx && offset == const0_rtx) ++ return; ++ ++ loongarch_frame_barrier (); ++ if (offset == const0_rtx) ++ { ++ emit_move_insn (stack_pointer_rtx, base); ++ loongarch_epilogue_set_cfa (stack_pointer_rtx, new_frame_size); ++ } ++ else ++ { ++ emit_insn (gen_add3_insn (stack_pointer_rtx, base, offset)); ++ loongarch_epilogue_set_cfa (stack_pointer_rtx, new_frame_size); ++ } ++} ++ ++/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P ++ says which. */ ++ ++void ++loongarch_expand_epilogue (bool sibcall_p) ++{ ++ const struct loongarch_frame_info *frame; ++ HOST_WIDE_INT step1, step2; ++ rtx base, adjust; ++ rtx_insn *insn; ++ ++ if (!sibcall_p && loongarch_can_use_return_insn ()) ++ { ++ emit_jump_insn (gen_return ()); ++ return; ++ } ++ ++ ++ /* Split the frame into two. STEP1 is the amount of stack we should ++ deallocate before restoring the registers. STEP2 is the amount we ++ should deallocate afterwards. ++ ++ Start off by assuming that no registers need to be restored. */ ++ frame = &cfun->machine->frame; ++ step1 = frame->total_size; ++ step2 = 0; ++ ++ /* Work out which register holds the frame address. */ ++ if (!frame_pointer_needed) ++ base = stack_pointer_rtx; ++ else ++ { ++ base = hard_frame_pointer_rtx; ++ step1 -= frame->hard_frame_pointer_offset; ++ } ++ loongarch_epilogue.cfa_reg = base; ++ loongarch_epilogue.cfa_offset = step1; ++ loongarch_epilogue.cfa_restores = NULL_RTX; ++ ++ /* If we need to restore registers, deallocate as much stack as ++ possible in the second step without going out of range. */ ++ if ((frame->mask | frame->fmask | frame->acc_mask) != 0 ++ || frame->num_cop0_regs > 0) ++ { ++ step2 = MIN (step1, LARCH_MAX_FIRST_STACK_STEP); ++ step1 -= step2; ++ } ++ ++ /* Get an rtx for STEP1 that we can add to BASE. */ ++ adjust = GEN_INT (step1); ++ if (!SMALL_OPERAND (step1)) ++ { ++ loongarch_emit_move (LARCH_EPILOGUE_TEMP (Pmode), adjust); ++ adjust = LARCH_EPILOGUE_TEMP (Pmode); ++ } ++ loongarch_deallocate_stack (base, adjust, step2); ++ ++ loongarch_epilogue.cfa_restore_sp_offset = step2; ++ { ++ /* Restore the registers. */ ++ loongarch_for_each_saved_gpr_and_fpr (frame->total_size - step2, ++ loongarch_restore_reg); ++ ++ if (cfun->machine->interrupt_handler_p) ++ { ++// need to be improved !! ++ } ++ else ++ /* Deallocate the final bit of the frame. */ ++ loongarch_deallocate_stack (stack_pointer_rtx, GEN_INT (step2), 0); ++ } ++ ++ if (cfun->machine->use_frame_header_for_callee_saved_regs) ++ loongarch_epilogue_emit_cfa_restores (); ++ else ++ gcc_assert (!loongarch_epilogue.cfa_restores); ++ ++ /* Add in the __builtin_eh_return stack adjustment. */ ++ if (crtl->calls_eh_return) ++ { ++ emit_insn (gen_add3_insn (stack_pointer_rtx, ++ stack_pointer_rtx, ++ EH_RETURN_STACKADJ_RTX)); ++ } ++ ++ if (!sibcall_p) ++ { ++ if (cfun->machine->interrupt_handler_p) ++ { ++ // need to be improved !! ++ } ++ else ++ { ++ rtx pat; ++ ++ rtx reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); ++ pat = gen_simple_return_internal (reg); ++ emit_jump_insn (pat); ++ } ++ } ++ ++} ++ ++/* Return nonzero if this function is known to have a null epilogue. ++ This allows the optimizer to omit jumps to jumps if no stack ++ was created. */ ++ ++bool ++loongarch_can_use_return_insn (void) ++{ ++ /* Interrupt handlers need to go through the epilogue. */ ++ if (cfun->machine->interrupt_handler_p) ++ return false; ++ ++ if (!reload_completed) ++ return false; ++ ++ if (crtl->profile) ++ return false; ++ ++ ++ return (cfun->machine->frame.total_size == 0 ++ && !cfun->machine->use_frame_header_for_callee_saved_regs); ++} ++ ++/* Return true if register REGNO can store a value of mode MODE. ++ The result of this function is cached in loongarch_hard_regno_mode_ok. */ ++ ++static bool ++loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) ++{ ++ unsigned int size; ++ enum mode_class mclass; ++ ++ if (mode == CCmode) ++ return ST_REG_P (regno); ++ ++ size = GET_MODE_SIZE (mode); ++ mclass = GET_MODE_CLASS (mode); ++ ++ if (GP_REG_P (regno)) ++ return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; ++ ++ if (FP_REG_P (regno) ++ && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0 ++ || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG))) ++ { ++ if (mclass == MODE_FLOAT ++ || mclass == MODE_COMPLEX_FLOAT ++ || mclass == MODE_VECTOR_FLOAT) ++ return size <= UNITS_PER_FPVALUE; ++ ++ /* Allow integer modes that fit into a single register. We need ++ to put integers into FPRs when using instructions like CVT ++ and TRUNC. There's no point allowing sizes smaller than a word, ++ because the FPU has no appropriate load/store instructions. */ ++ if (mclass == MODE_INT) ++ return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; ++ } ++ ++ return false; ++} ++ ++/* Implement TARGET_HARD_REGNO_MODE_OK. */ ++ ++static bool ++loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode) ++{ ++ return loongarch_hard_regno_mode_ok_p[mode][regno]; ++} ++ ++/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ ++ ++bool ++loongarch_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, ++ unsigned int new_reg) ++{ ++ /* Interrupt functions can only use registers that have already been ++ saved by the prologue, even if they would normally be call-clobbered. */ ++ if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (new_reg)) ++ return false; ++ ++ return true; ++} ++ ++/* Return nonzero if register REGNO can be used as a scratch register ++ in peephole2. */ ++ ++bool ++loongarch_hard_regno_scratch_ok (unsigned int regno) ++{ ++ /* See loongarch_hard_regno_rename_ok. */ ++ if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (regno)) ++ return false; ++ ++ return true; ++} ++ ++/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. Odd-numbered ++ single-precision registers are not considered callee-saved for o32 ++ FPXX as they will be clobbered when run on an FR=1 FPU.*/ ++ ++static bool ++loongarch_hard_regno_call_part_clobbered (unsigned int abi_id, ++ unsigned int regno, ++ machine_mode mode) ++{ ++ return false; ++} ++ ++/* Implement TARGET_HARD_REGNO_NREGS. */ ++ ++static unsigned int ++loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) ++{ ++ if (ST_REG_P (regno)) ++ /* The size of FP status registers is always 4, because they only hold ++ CCmode values, and CCmode is always considered to be 4 bytes wide. */ ++ return (GET_MODE_SIZE (mode) + 3) / 4; ++ ++ if (FP_REG_P (regno)) ++ return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; ++ ++ /* All other registers are word-sized. */ ++ return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++} ++ ++/* Implement CLASS_MAX_NREGS, taking the maximum of the cases ++ in loongarch_hard_regno_nregs. */ ++ ++int ++loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) ++{ ++ int size; ++ HARD_REG_SET left; ++ ++ size = 0x8000; ++ left = reg_class_contents[(int) rclass]; ++ if (hard_reg_set_intersect_p (left, reg_class_contents[(int) ST_REGS])) ++ { ++ if (loongarch_hard_regno_mode_ok (ST_REG_FIRST, mode)) ++ size = MIN (size, 4); ++ ++ left &= ~reg_class_contents[(int) ST_REGS]; ++ } ++ if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS])) ++ { ++ if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode)) ++ size = MIN (size, UNITS_PER_FPREG); ++ ++ left &= ~reg_class_contents[(int) FP_REGS]; ++ } ++ if (!hard_reg_set_empty_p (left)) ++ size = MIN (size, UNITS_PER_WORD); ++ return (GET_MODE_SIZE (mode) + size - 1) / size; ++} ++ ++/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ ++ ++static bool ++loongarch_can_change_mode_class (machine_mode from, ++ machine_mode to, reg_class_t rclass) ++{ ++ /* Allow conversions between different Loongson integer vectors, ++ and between those vectors and DImode. */ ++ if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8 ++ && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to)) ++ return true; ++ ++ /* Otherwise, there are several problems with changing the modes of ++ values in floating-point registers: ++ ++ - When a multi-word value is stored in paired floating-point ++ registers, the first register always holds the low word. We ++ therefore can't allow FPRs to change between single-word and ++ multi-word modes on big-endian targets. ++ ++ - GCC assumes that each word of a multiword register can be ++ accessed individually using SUBREGs. This is not true for ++ floating-point registers if they are bigger than a word. ++ ++ - Loading a 32-bit value into a 64-bit floating-point register ++ will not sign-extend the value, despite what LOAD_EXTEND_OP ++ says. We can't allow FPRs to change from SImode to a wider ++ mode on 64-bit targets. ++ ++ - If the FPU has already interpreted a value in one format, we ++ must not ask it to treat the value as having a different ++ format. ++ ++ We therefore disallow all mode changes involving FPRs. */ ++ ++ return !reg_classes_intersect_p (FP_REGS, rclass); ++} ++ ++/* Implement target hook small_register_classes_for_mode_p. */ ++ ++static bool ++loongarch_small_register_classes_for_mode_p (machine_mode mode ++ ATTRIBUTE_UNUSED) ++{ ++ return 0; ++} ++ ++/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction, ++ */ ++ ++static bool ++loongarch_mode_ok_for_mov_fmt_p (machine_mode mode) ++{ ++ switch (mode) ++ { ++ case E_SFmode: ++ return TARGET_HARD_FLOAT; ++ ++ case E_DFmode: ++ return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT; ++ ++ case E_V2SFmode: ++ return 0; ++ ++ default: ++ return 0; ++ } ++} ++ ++/* Implement TARGET_MODES_TIEABLE_P. */ ++ ++static bool ++loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2) ++{ ++ /* FPRs allow no mode punning, so it's not worth tying modes if we'd ++ prefer to put one of them in FPRs. */ ++ return (mode1 == mode2 ++ || (!loongarch_mode_ok_for_mov_fmt_p (mode1) ++ && !loongarch_mode_ok_for_mov_fmt_p (mode2))); ++} ++ ++/* Implement TARGET_PREFERRED_RELOAD_CLASS. */ ++ ++static reg_class_t ++loongarch_preferred_reload_class (rtx x, reg_class_t rclass) ++{ ++ if (reg_class_subset_p (FP_REGS, rclass) ++ && loongarch_mode_ok_for_mov_fmt_p (GET_MODE (x))) ++ return FP_REGS; ++ ++ if (reg_class_subset_p (GR_REGS, rclass)) ++ rclass = GR_REGS; ++ ++ return rclass; ++} ++ ++/* RCLASS is a class involved in a REGISTER_MOVE_COST calculation. ++ Return a "canonical" class to represent it in later calculations. */ ++ ++static reg_class_t ++loongarch_canonicalize_move_class (reg_class_t rclass) ++{ ++ if (reg_class_subset_p (rclass, GENERAL_REGS)) ++ rclass = GENERAL_REGS; ++ ++ return rclass; ++} ++ ++/* Return the cost of moving a value from a register of class FROM to a GPR. ++ Return 0 for classes that are unions of other classes handled by this ++ function. */ ++ ++static int ++loongarch_move_to_gpr_cost (reg_class_t from) ++{ ++ switch (from) ++ { ++ case GENERAL_REGS: ++ /* MOVE macro. */ ++ return 2; ++ ++ case FP_REGS: ++ /* MFC1, etc. */ ++ return 4; ++ ++ default: ++ return 0; ++ } ++} ++ ++/* Return the cost of moving a value from a GPR to a register of class TO. ++ Return 0 for classes that are unions of other classes handled by this ++ function. */ ++ ++static int ++loongarch_move_from_gpr_cost (reg_class_t to) ++{ ++ switch (to) ++ { ++ case GENERAL_REGS: ++ /*MOVE macro. */ ++ return 2; ++ ++ case FP_REGS: ++ /* MTC1, etc. */ ++ return 4; ++ ++ default: ++ return 0; ++ } ++} ++ ++/* Implement TARGET_REGISTER_MOVE_COST. Return 0 for classes that are the ++ maximum of the move costs for subclasses; regclass will work out ++ the maximum for us. */ ++ ++static int ++loongarch_register_move_cost (machine_mode mode, ++ reg_class_t from, reg_class_t to) ++{ ++ reg_class_t dregs; ++ int cost1, cost2; ++ ++ from = loongarch_canonicalize_move_class (from); ++ to = loongarch_canonicalize_move_class (to); ++ ++ /* Handle moves that can be done without using general-purpose registers. */ ++ if (from == FP_REGS) ++ { ++ if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode)) ++ /* MOV.FMT. */ ++ return 4; ++ } ++ ++ /* Handle cases in which only one class deviates from the ideal. */ ++ dregs = GENERAL_REGS; ++ if (from == dregs) ++ return loongarch_move_from_gpr_cost (to); ++ if (to == dregs) ++ return loongarch_move_to_gpr_cost (from); ++ ++ /* Handles cases that require a GPR temporary. */ ++ cost1 = loongarch_move_to_gpr_cost (from); ++ if (cost1 != 0) ++ { ++ cost2 = loongarch_move_from_gpr_cost (to); ++ if (cost2 != 0) ++ return cost1 + cost2; ++ } ++ ++ return 0; ++} ++ ++/* Implement TARGET_MEMORY_MOVE_COST. */ ++ ++static int ++loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in) ++{ ++ return (loongarch_cost->memory_latency ++ + memory_move_secondary_cost (mode, rclass, in)); ++} ++ ++/* Implement TARGET_SECONDARY_MEMORY_NEEDED. ++ ++ When targeting the o32 FPXX ABI, all moves with a length of doubleword ++ or greater must be performed by FR-mode-aware instructions. ++ This can be achieved using MOVFRH2GR.S/MOVGR2FRH.W when these instructions are ++ available but otherwise moves must go via memory. ++ Using MOVGR2FR/MOVFR2GR to access the lower-half of these registers would require ++ a forbidden single-precision access. We require all double-word moves to use ++ memory because adding even and odd floating-point registers classes ++ would have a significant impact on the backend. */ ++ ++static bool ++loongarch_secondary_memory_needed (machine_mode mode, reg_class_t class1, ++ reg_class_t class2) ++{ ++ /* Ignore spilled pseudos. */ ++ if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) ++ return false; ++ ++ return false; ++} ++ ++/* Return the register class required for a secondary register when ++ copying between one of the registers in RCLASS and value X, which ++ has mode MODE. X is the source of the move if IN_P, otherwise it ++ is the destination. Return NO_REGS if no secondary register is ++ needed. */ ++ ++enum reg_class ++loongarch_secondary_reload_class (enum reg_class rclass, ++ machine_mode mode, rtx x, bool) ++{ ++ int regno; ++ ++ regno = true_regnum (x); ++ ++ /* Copying from accumulator registers to anywhere other than a general ++ register requires a temporary general register. */ ++// if (reg_class_subset_p (rclass, ACC_REGS)) ?????? ++// return GP_REG_P (regno) ? NO_REGS : GR_REGS; ++ if (reg_class_subset_p (rclass, FP_REGS)) ++ { ++ if (regno < 0 ++ || (MEM_P (x) ++ && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8))) ++ /* In this case we can use lwc1, swc1, ldc1 or sdc1. We'll use ++ pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */ ++ return NO_REGS; ++ ++ if (GP_REG_P (regno) || x == CONST0_RTX (mode)) ++ /* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or movfr2gr.d. */ ++ return NO_REGS; ++ ++ if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x)) ++ /* We can force the constant to memory and use lwc1 ++ and ldc1. As above, we will use pairs of lwc1s if ++ ldc1 is not supported. */ ++ return NO_REGS; ++ ++ if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode)) ++ /* In this case we can use mov.fmt. */ ++ return NO_REGS; ++ ++ /* Otherwise, we need to reload through an integer register. */ ++ return GR_REGS; ++ } ++ if (FP_REG_P (regno)) ++ return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS; ++ ++ return NO_REGS; ++} ++ ++ ++/* Implement TARGET_VALID_POINTER_MODE. */ ++ ++static bool ++loongarch_valid_pointer_mode (scalar_int_mode mode) ++{ ++ return mode == SImode || (TARGET_64BIT && mode == DImode); ++} ++ ++/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */ ++ ++static bool ++loongarch_scalar_mode_supported_p (scalar_mode mode) ++{ ++ if (ALL_FIXED_POINT_MODE_P (mode) ++ && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD) ++ return true; ++ ++ return default_scalar_mode_supported_p (mode); ++} ++ ++/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ ++ ++static machine_mode ++loongarch_preferred_simd_mode (scalar_mode mode) ++{ ++ return word_mode; ++} ++ ++/* Return the length of INSN. LENGTH is the initial length computed by ++ attributes in the machine-description file. */ ++ ++int ++loongarch_adjust_insn_length (rtx_insn *insn, int length) ++{ ++ /* loongarch.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length ++ of a PIC long-branch sequence. Substitute the correct value. */ ++ if (length == MAX_PIC_BRANCH_LENGTH ++ && JUMP_P (insn) ++ && INSN_CODE (insn) >= 0 ++ && get_attr_type (insn) == TYPE_BRANCH) ++ { ++ /* Add the branch-over instruction and its delay slot, if this ++ is a conditional branch. */ ++ length = simplejump_p (insn) ? 0 : 8; ++ ++ /* Add the length of an indirect jump, ignoring the delay slot. */ ++ length += 4; ++ } ++ ++ /* A unconditional jump has an unfilled delay slot if it is not part ++ of a sequence. A conditional jump normally has a delay slot. */ ++ if (CALL_P (insn) || (JUMP_P (insn))) ++ length += 4; ++ ++ /* See how many nops might be needed to avoid hardware hazards. */ ++ if (!cfun->machine->ignore_hazard_length_p ++ && INSN_P (insn) ++ && INSN_CODE (insn) >= 0) ++ switch (get_attr_hazard (insn)) ++ { ++ case HAZARD_NONE: ++ break; ++ ++ case HAZARD_DELAY: ++ case HAZARD_FORBIDDEN_SLOT: ++ length += NOP_INSN_LENGTH; ++ break; ++ } ++ ++ return length; ++} ++ ++/* Return the assembly code for INSN, which has the operands given by ++ OPERANDS, and which branches to OPERANDS[0] if some condition is true. ++ BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0] ++ is in range of a direct branch. BRANCH_IF_FALSE is an inverted ++ version of BRANCH_IF_TRUE. */ ++ ++const char * ++loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands, ++ const char *branch_if_true, ++ const char *branch_if_false) ++{ ++ unsigned int length; ++ rtx taken; ++ ++ gcc_assert (LABEL_P (operands[0])); ++ ++ length = get_attr_length (insn); ++ if (length <= 12) ++ { ++ return branch_if_true; ++ } ++ ++ /* Generate a reversed branch around a direct jump. This fallback does ++ not use branch-likely instructions. */ ++ rtx_code_label *not_taken = gen_label_rtx (); ++ taken = operands[0]; ++ ++ /* Generate the reversed branch to NOT_TAKEN. */ ++ operands[0] = not_taken; ++ output_asm_insn (branch_if_false, operands); ++ ++ /* If INSN has a delay slot, we must provide delay slots for both the ++ branch to NOT_TAKEN and the conditional jump. We must also ensure ++ that INSN's delay slot is executed in the appropriate cases. */ ++ if (final_sequence) ++ { ++ /* This first delay slot will always be executed, so use INSN's ++ delay slot if is not annulled. */ ++ if (!INSN_ANNULLED_BRANCH_P (insn)) ++ { ++ final_scan_insn (final_sequence->insn (1), ++ asm_out_file, optimize, 1, NULL); ++ final_sequence->insn (1)->set_deleted (); ++ } ++ fprintf (asm_out_file, "\n"); ++ } ++ ++ output_asm_insn (LARCH_ABSOLUTE_JUMP ("b\t%0"), &taken); ++ ++ /* Now deal with its delay slot; see above. */ ++ if (final_sequence) ++ { ++ /* This delay slot will only be executed if the branch is taken. ++ Use INSN's delay slot if is annulled. */ ++ if (INSN_ANNULLED_BRANCH_P (insn)) ++ { ++ final_scan_insn (final_sequence->insn (1), ++ asm_out_file, optimize, 1, NULL); ++ final_sequence->insn (1)->set_deleted (); ++ } ++ fprintf (asm_out_file, "\n"); ++ } ++ ++ /* Output NOT_TAKEN. */ ++ targetm.asm_out.internal_label (asm_out_file, "L", ++ CODE_LABEL_NUMBER (not_taken)); ++ return ""; ++} ++ ++/* Return the assembly code for INSN, which branches to OPERANDS[0] ++ if some equality condition is true. The condition is given by ++ OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of ++ OPERANDS[1]. OPERANDS[2] is the comparison's first operand; ++ OPERANDS[3] is the second operand and may be zero or a register. */ ++ ++const char * ++loongarch_output_equal_conditional_branch (rtx_insn* insn, rtx *operands, ++ bool inverted_p) ++{ ++ const char *branch[2]; ++ if (operands[3] == const0_rtx) ++ { ++ branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0"); ++ } else ++ { ++ branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0"); ++ } ++ ++ return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]); ++} ++ ++/* Return the assembly code for INSN, which branches to OPERANDS[0] ++ if some ordering condition is true. The condition is given by ++ OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of ++ OPERANDS[1]. OPERANDS[2] is the comparison's first operand; ++ OPERANDS[3] is the second operand and may be zero or a register. */ ++ ++const char * ++loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, ++ bool inverted_p) ++{ ++ const char *branch[2]; ++ ++ /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true. ++ Make BRANCH[0] branch on the inverse condition. */ ++ if (operands[3] != const0_rtx) ++ { ++ /* Handle degenerate cases that should not, but do, occur. */ ++ if (REGNO (operands[2]) == REGNO (operands[3])) ++ { ++ switch (GET_CODE (operands[1])) ++ { ++ case LT: ++ case LTU: ++ inverted_p = !inverted_p; ++ /* Fall through. */ ++ case GE: ++ case GEU: ++ branch[!inverted_p] = LARCH_BRANCH ("b", "%0"); ++ branch[inverted_p] = "\t# branch never"; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ else ++ { ++ switch (GET_CODE (operands[1])) ++ { ++ case LE: ++ branch[!inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0"); ++ break; ++ case LEU: ++ branch[!inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0"); ++ break; ++ case GT: ++ branch[!inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0"); ++ break; ++ case GTU: ++ branch[!inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0"); ++ break; ++ case LT: ++ case LTU: ++ case GE: ++ case GEU: ++ branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0"); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ } ++ else ++ { ++ switch (GET_CODE (operands[1])) ++ { ++ /* These cases are equivalent to comparisons against zero. */ ++ case LEU: ++ inverted_p = !inverted_p; ++ /* Fall through. */ ++ case GTU: ++ branch[!inverted_p] = LARCH_BRANCH ("bne", "%2,%.,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("beq", "%2,%.,%0"); ++ break; ++ ++ /* These cases are always true or always false. */ ++ case LTU: ++ inverted_p = !inverted_p; ++ /* Fall through. */ ++ case GEU: ++ branch[!inverted_p] = LARCH_BRANCH ("beq", "%.,%.,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("bne", "%.,%.,%0"); ++ break; ++ ++ case LE: ++ branch[!inverted_p] = LARCH_BRANCH ("bge", "$zero,%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("blt", "$zero,%2,%0"); ++ break; ++ case GT: ++ branch[!inverted_p] = LARCH_BRANCH ("blt", "$zero,%2,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("bge", "$zero,%2,%0"); ++ break; ++ case LT: ++ case GE: ++ branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$zero,%0"); ++ branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,$zero,%0"); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]); ++} ++ ++ ++ ++/* Return the assembly code for DIV or DDIV instruction DIVISION, which has ++ the operands given by OPERANDS. Add in a divide-by-zero check if needed. ++ ++ When working around R4000 and R4400 errata, we need to make sure that ++ the division is not immediately followed by a shift[1][2]. We also ++ need to stop the division from being put into a branch delay slot[3]. ++ The easiest way to avoid both problems is to add a nop after the ++ division. When a divide-by-zero check is needed, this nop can be ++ used to fill the branch delay slot. ++ ++ [1] If a double-word or a variable shift executes immediately ++ after starting an integer division, the shift may give an ++ incorrect result. See quotations of errata #16 and #28 from ++ "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0" ++ in loongarch.md for details. ++ ++ [2] A similar bug to [1] exists for all revisions of the ++ R4000 and the R4400 when run in an MC configuration. ++ From "LARCH R4000MC Errata, Processor Revision 2.2 and 3.0": ++ ++ "19. In this following sequence: ++ ++ ddiv (or ddivu or div or divu) ++ dsll32 (or dsrl32, dsra32) ++ ++ if an MPT stall occurs, while the divide is slipping the cpu ++ pipeline, then the following double shift would end up with an ++ incorrect result. ++ ++ Workaround: The compiler needs to avoid generating any ++ sequence with divide followed by extended double shift." ++ ++ This erratum is also present in "LARCH R4400MC Errata, Processor ++ Revision 1.0" and "LARCH R4400MC Errata, Processor Revision 2.0 ++ & 3.0" as errata #10 and #4, respectively. ++ ++ [3] From "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0" ++ (also valid for LARCH R4000MC processors): ++ ++ "52. R4000SC: This bug does not apply for the R4000PC. ++ ++ There are two flavors of this bug: ++ ++ 1) If the instruction just after divide takes an RF exception ++ (tlb-refill, tlb-invalid) and gets an instruction cache ++ miss (both primary and secondary) and the line which is ++ currently in secondary cache at this index had the first ++ data word, where the bits 5..2 are set, then R4000 would ++ get a wrong result for the div. ++ ++ ##1 ++ nop ++ div r8, r9 ++ ------------------- # end-of page. -tlb-refill ++ nop ++ ##2 ++ nop ++ div r8, r9 ++ ------------------- # end-of page. -tlb-invalid ++ nop ++ ++ 2) If the divide is in the taken branch delay slot, where the ++ target takes RF exception and gets an I-cache miss for the ++ exception vector or where I-cache miss occurs for the ++ target address, under the above mentioned scenarios, the ++ div would get wrong results. ++ ++ ##1 ++ j r2 # to next page mapped or unmapped ++ div r8,r9 # this bug would be there as long ++ # as there is an ICache miss and ++ nop # the "data pattern" is present ++ ++ ##2 ++ beq r0, r0, NextPage # to Next page ++ div r8,r9 ++ nop ++ ++ This bug is present for div, divu, ddiv, and ddivu ++ instructions. ++ ++ Workaround: For item 1), OS could make sure that the next page ++ after the divide instruction is also mapped. For item 2), the ++ compiler could make sure that the divide instruction is not in ++ the branch delay slot." ++ ++ These processors have PRId values of 0x00004220 and 0x00004300 for ++ the R4000 and 0x00004400, 0x00004500 and 0x00004600 for the R4400. */ ++ ++const char * ++loongarch_output_division (const char *division, rtx *operands) ++{ ++ const char *s; ++ ++ s = division; ++ if (TARGET_CHECK_ZERO_DIV) ++ { ++ output_asm_insn (s, operands); ++ s = "bne\t%2,%.,1f\n\tbreak\t7\n1:"; ++ } ++ return s; ++} ++ ++ ++/* Return true if destination of IN_INSN is used as add source in ++ OUT_INSN. Both IN_INSN and OUT_INSN are of type fmadd. Example: ++ madd.s dst, x, y, z ++ madd.s a, dst, b, c */ ++ ++bool ++loongarch_fmadd_bypass (rtx_insn *out_insn, rtx_insn *in_insn) ++{ ++ int dst_reg, src_reg; ++ ++ gcc_assert (get_attr_type (in_insn) == TYPE_FMADD); ++ gcc_assert (get_attr_type (out_insn) == TYPE_FMADD); ++ ++ extract_insn (in_insn); ++ dst_reg = REG_P (recog_data.operand[0]); ++ ++ extract_insn (out_insn); ++ src_reg = REG_P (recog_data.operand[1]); ++ ++ if (dst_reg == src_reg) ++ return true; ++ ++ return false; ++} ++ ++/* Return true if IN_INSN is a multiply-add or multiply-subtract ++ instruction and if OUT_INSN assigns to the accumulator operand. */ ++ ++bool ++loongarch_linked_madd_p (rtx_insn *out_insn, rtx_insn *in_insn) ++{ ++ enum attr_accum_in accum_in; ++ int accum_in_opnum; ++ rtx accum_in_op; ++ ++ if (recog_memoized (in_insn) < 0) ++ return false; ++ ++ accum_in = get_attr_accum_in (in_insn); ++ if (accum_in == ACCUM_IN_NONE) ++ return false; ++ ++ accum_in_opnum = accum_in - ACCUM_IN_0; ++ ++ extract_insn (in_insn); ++ gcc_assert (accum_in_opnum < recog_data.n_operands); ++ accum_in_op = recog_data.operand[accum_in_opnum]; ++ ++ return reg_set_p (accum_in_op, out_insn); ++} ++ ++/* True if the dependency between OUT_INSN and IN_INSN is on the store ++ data rather than the address. We need this because the cprestore ++ pattern is type "store", but is defined using an UNSPEC_VOLATILE, ++ which causes the default routine to abort. We just return false ++ for that case. */ ++ ++bool ++loongarch_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) ++{ ++ if (GET_CODE (PATTERN (in_insn)) == UNSPEC_VOLATILE) ++ return false; ++ ++ return store_data_bypass_p (out_insn, in_insn); ++} ++ ++ ++/* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output ++ dependencies have no cost, except on the 20Kc where output-dependence ++ is treated like input-dependence. */ ++ ++static int ++loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned int) ++{ ++ if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT)) ++ return 0; ++ return cost; ++} ++ ++/* Return the number of instructions that can be issued per cycle. */ ++ ++static int ++loongarch_issue_rate (void) ++{ ++ switch (loongarch_tune) ++ { ++ case PROCESSOR_LOONGARCH64: ++ case PROCESSOR_GS464V: ++ return 4; ++ ++ default: ++ return 1; ++ } ++} ++ ++/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should ++ be as wide as the scheduling freedom in the DFA. */ ++ ++static int ++loongarch_multipass_dfa_lookahead (void) ++{ ++ if (TUNE_LOONGARCH64 || TUNE_GS464V) ++ return 4; ++ ++ return 0; ++} ++ ++ ++static void ++loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, ++ int max_ready ATTRIBUTE_UNUSED) ++{ ++} ++ ++/* Implement TARGET_SCHED_REORDER. */ ++ ++static int ++loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, ++ rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) ++{ ++ return loongarch_issue_rate (); ++} ++ ++/* Implement TARGET_SCHED_REORDER2. */ ++ ++static int ++loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, ++ rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) ++{ ++ return cached_can_issue_more; ++} ++ ++/* Implement TARGET_SCHED_VARIABLE_ISSUE. */ ++ ++static int ++loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, ++ rtx_insn *insn, int more) ++{ ++ /* Ignore USEs and CLOBBERs; don't count them against the issue rate. */ ++ if (USEFUL_INSN_P (insn)) ++ { ++ if (get_attr_type (insn) != TYPE_GHOST) ++ more--; ++ } ++ ++ /* Instructions of type 'multi' should all be split before ++ the second scheduling pass. */ ++ gcc_assert (!reload_completed ++ || recog_memoized (insn) < 0 ++ || get_attr_type (insn) != TYPE_MULTI); ++ ++ cached_can_issue_more = more; ++ return more; ++} ++ ++/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY), ++ return the first operand of the associated PREF or PREFX insn. */ ++ ++rtx ++loongarch_prefetch_cookie (rtx write, rtx locality) ++{ ++ /* store_streamed / load_streamed. */ ++ if (INTVAL (locality) <= 0) ++ return GEN_INT (INTVAL (write) + 4); ++ ++ /* store / load. */ ++ if (INTVAL (locality) <= 2) ++ return write; ++ ++ /* store_retained / load_retained. */ ++ return GEN_INT (INTVAL (write) + 6); ++} ++ ++ ++ ++/* Return whether CFG is used in loongarch_reorg. */ ++ ++static bool ++loongarch_cfg_in_reorg (void) ++{ ++ return (TARGET_RELAX_PIC_CALLS); ++} ++ ++/* If INSN is a call, return the underlying CALL expr. Return NULL_RTX ++ otherwise. If INSN has two call rtx, then store the second one in ++ SECOND_CALL. */ ++ ++static rtx ++loongarch_call_expr_from_insn (rtx_insn *insn, rtx *second_call) ++{ ++ rtx x; ++ rtx x2; ++ ++ if (!CALL_P (insn)) ++ return NULL_RTX; ++ ++ x = PATTERN (insn); ++ if (GET_CODE (x) == PARALLEL) ++ { ++ /* Calls returning complex values have two CALL rtx. Look for the second ++ one here, and return it via the SECOND_CALL arg. */ ++ x2 = XVECEXP (x, 0, 1); ++ if (GET_CODE (x2) == SET) ++ x2 = XEXP (x2, 1); ++ if (GET_CODE (x2) == CALL) ++ *second_call = x2; ++ ++ x = XVECEXP (x, 0, 0); ++ } ++ if (GET_CODE (x) == SET) ++ x = XEXP (x, 1); ++ gcc_assert (GET_CODE (x) == CALL); ++ ++ return x; ++} ++ ++/* REG is set in DEF. See if the definition is one of the ways we load a ++ register with a symbol address for a loongarch_use_pic_fn_addr_reg_p call. ++ If it is, return the symbol reference of the function, otherwise return ++ NULL_RTX. ++ ++ If RECURSE_P is true, use loongarch_find_pic_call_symbol to interpret ++ the values of source registers, otherwise treat such registers as ++ having an unknown value. */ ++ ++static rtx ++loongarch_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p) ++{ ++ rtx_insn *def_insn; ++ rtx set; ++ ++ if (DF_REF_IS_ARTIFICIAL (def)) ++ return NULL_RTX; ++ ++ def_insn = DF_REF_INSN (def); ++ set = single_set (def_insn); ++ if (set && rtx_equal_p (SET_DEST (set), reg)) ++ { ++ rtx note, src, symbol; ++ ++ /* First see whether the source is a plain symbol. This is used ++ when calling symbols that are not lazily bound. */ ++ src = SET_SRC (set); ++ if (GET_CODE (src) == SYMBOL_REF) ++ return src; ++ ++ /* Handle %call16 references. */ ++ symbol = loongarch_strip_unspec_call (src); ++ if (symbol) ++ { ++ gcc_assert (GET_CODE (symbol) == SYMBOL_REF); ++ return symbol; ++ } ++ ++ /* If we have something more complicated, look for a ++ REG_EQUAL or REG_EQUIV note. */ ++ note = find_reg_equal_equiv_note (def_insn); ++ if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF) ++ return XEXP (note, 0); ++ ++ /* Follow at most one simple register copy. Such copies are ++ interesting in cases like: ++ ++ for (...) ++ { ++ locally_binding_fn (...); ++ } ++ ++ and: ++ ++ locally_binding_fn (...); ++ ... ++ locally_binding_fn (...); ++ ++ where the load of locally_binding_fn can legitimately be ++ hoisted or shared. However, we do not expect to see complex ++ chains of copies, so a full worklist solution to the problem ++ would probably be overkill. */ ++ if (recurse_p && REG_P (src)) ++ return loongarch_find_pic_call_symbol (def_insn, src, false); ++ } ++ ++ return NULL_RTX; ++} ++ ++/* Find the definition of the use of REG in INSN. See if the definition ++ is one of the ways we load a register with a symbol address for a ++ loongarch_use_pic_fn_addr_reg_p call. If it is return the symbol reference ++ of the function, otherwise return NULL_RTX. RECURSE_P is as for ++ loongarch_pic_call_symbol_from_set. */ ++ ++static rtx ++loongarch_find_pic_call_symbol (rtx_insn *insn, rtx reg, bool recurse_p) ++{ ++ df_ref use; ++ struct df_link *defs; ++ rtx symbol; ++ ++ use = df_find_use (insn, regno_reg_rtx[REGNO (reg)]); ++ if (!use) ++ return NULL_RTX; ++ defs = DF_REF_CHAIN (use); ++ if (!defs) ++ return NULL_RTX; ++ symbol = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p); ++ if (!symbol) ++ return NULL_RTX; ++ ++ /* If we have more than one definition, they need to be identical. */ ++ for (defs = defs->next; defs; defs = defs->next) ++ { ++ rtx other; ++ ++ other = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p); ++ if (!rtx_equal_p (symbol, other)) ++ return NULL_RTX; ++ } ++ ++ return symbol; ++} ++ ++/* Replace the args_size operand of the call expression CALL with the ++ call-attribute UNSPEC and fill in SYMBOL as the function symbol. */ ++ ++static void ++loongarch_annotate_pic_call_expr (rtx call, rtx symbol) ++{ ++ rtx args_size; ++ ++ args_size = XEXP (call, 1); ++ XEXP (call, 1) = gen_rtx_UNSPEC (GET_MODE (args_size), ++ gen_rtvec (2, args_size, symbol), ++ UNSPEC_CALL_ATTR); ++} ++ ++/* OPERANDS[ARGS_SIZE_OPNO] is the arg_size operand of a CALL expression. See ++ if instead of the arg_size argument it contains the call attributes. If ++ yes return true along with setting OPERANDS[ARGS_SIZE_OPNO] to the function ++ symbol from the call attributes. Also return false if ARGS_SIZE_OPNO is ++ -1. */ ++ ++bool ++loongarch_get_pic_call_symbol (rtx *operands, int args_size_opno) ++{ ++ rtx args_size, symbol; ++ ++ if (!TARGET_RELAX_PIC_CALLS || args_size_opno == -1) ++ return false; ++ ++ args_size = operands[args_size_opno]; ++ if (GET_CODE (args_size) != UNSPEC) ++ return false; ++ gcc_assert (XINT (args_size, 1) == UNSPEC_CALL_ATTR); ++ ++ symbol = XVECEXP (args_size, 0, 1); ++ gcc_assert (GET_CODE (symbol) == SYMBOL_REF); ++ ++ operands[args_size_opno] = symbol; ++ return true; ++} ++ ++/* Use DF to annotate PIC indirect calls with the function symbol they ++ dispatch to. */ ++ ++static void ++loongarch_annotate_pic_calls (void) ++{ ++ basic_block bb; ++ rtx_insn *insn; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ FOR_BB_INSNS (bb, insn) ++ { ++ rtx call, reg, symbol, second_call; ++ ++ second_call = 0; ++ call = loongarch_call_expr_from_insn (insn, &second_call); ++ if (!call) ++ continue; ++ gcc_assert (MEM_P (XEXP (call, 0))); ++ reg = XEXP (XEXP (call, 0), 0); ++ if (!REG_P (reg)) ++ continue; ++ ++ symbol = loongarch_find_pic_call_symbol (insn, reg, true); ++ if (symbol) ++ { ++ loongarch_annotate_pic_call_expr (call, symbol); ++ if (second_call) ++ loongarch_annotate_pic_call_expr (second_call, symbol); ++ } ++ } ++} ++ ++ ++/* A structure representing the state of the processor pipeline. ++ Used by the loongarch_sim_* family of functions. */ ++struct loongarch_sim { ++ /* The maximum number of instructions that can be issued in a cycle. ++ (Caches loongarch_issue_rate.) */ ++ unsigned int issue_rate; ++ ++ /* The current simulation time. */ ++ unsigned int time; ++ ++ /* How many more instructions can be issued in the current cycle. */ ++ unsigned int insns_left; ++ ++ /* LAST_SET[X].INSN is the last instruction to set register X. ++ LAST_SET[X].TIME is the time at which that instruction was issued. ++ INSN is null if no instruction has yet set register X. */ ++ struct { ++ rtx_insn *insn; ++ unsigned int time; ++ } last_set[FIRST_PSEUDO_REGISTER]; ++ ++ /* The pipeline's current DFA state. */ ++ state_t dfa_state; ++}; ++ ++/* Reset STATE to the initial simulation state. */ ++ ++static void ++loongarch_sim_reset (struct loongarch_sim *state) ++{ ++ curr_state = state->dfa_state; ++ ++ state->time = 0; ++ state->insns_left = state->issue_rate; ++ memset (&state->last_set, 0, sizeof (state->last_set)); ++ state_reset (curr_state); ++ ++ targetm.sched.init (0, false, 0); ++ advance_state (curr_state); ++} ++ ++/* Initialize STATE before its first use. DFA_STATE points to an ++ allocated but uninitialized DFA state. */ ++ ++static void ++loongarch_sim_init (struct loongarch_sim *state, state_t dfa_state) ++{ ++ if (targetm.sched.init_dfa_pre_cycle_insn) ++ targetm.sched.init_dfa_pre_cycle_insn (); ++ ++ if (targetm.sched.init_dfa_post_cycle_insn) ++ targetm.sched.init_dfa_post_cycle_insn (); ++ ++ state->issue_rate = loongarch_issue_rate (); ++ state->dfa_state = dfa_state; ++ loongarch_sim_reset (state); ++} ++ ++ ++ ++/* Set up costs based on the current architecture and tuning settings. */ ++ ++static void ++loongarch_set_tuning_info (void) ++{ ++ ++ loongarch_tuning_info.arch = loongarch_arch; ++ loongarch_tuning_info.tune = loongarch_tune; ++ loongarch_tuning_info.initialized_p = true; ++ ++ dfa_start (); ++ ++ struct loongarch_sim state; ++ loongarch_sim_init (&state, alloca (state_size ())); ++ ++ dfa_finish (); ++} ++ ++/* Implement TARGET_EXPAND_TO_RTL_HOOK. */ ++ ++static void ++loongarch_expand_to_rtl_hook (void) ++{ ++ /* We need to call this at a point where we can safely create sequences ++ of instructions, so TARGET_OVERRIDE_OPTIONS is too early. We also ++ need to call it at a point where the DFA infrastructure is not ++ already in use, so we can't just call it lazily on demand. ++ ++ At present, loongarch_tuning_info is only needed during post-expand ++ RTL passes such as split_insns, so this hook should be early enough. ++ We may need to move the call elsewhere if loongarch_tuning_info starts ++ to be used for other things (such as rtx_costs, or expanders that ++ could be called during gimple optimization). */ ++ loongarch_set_tuning_info (); ++} ++ ++/* This structure records that the current function has a LO_SUM ++ involving SYMBOL_REF or LABEL_REF BASE and that MAX_OFFSET is ++ the largest offset applied to BASE by all such LO_SUMs. */ ++struct loongarch_lo_sum_offset { ++ rtx base; ++ HOST_WIDE_INT offset; ++}; ++ ++/* Return a hash value for SYMBOL_REF or LABEL_REF BASE. */ ++ ++static hashval_t ++loongarch_hash_base (rtx base) ++{ ++ int do_not_record_p; ++ ++ return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false); ++} ++ ++/* Hashtable helpers. */ ++ ++struct loongarch_lo_sum_offset_hasher : free_ptr_hash ++{ ++ typedef rtx_def *compare_type; ++ static inline hashval_t hash (const loongarch_lo_sum_offset *); ++ static inline bool equal (const loongarch_lo_sum_offset *, const rtx_def *); ++}; ++ ++/* Hash-table callbacks for loongarch_lo_sum_offsets. */ ++ ++inline hashval_t ++loongarch_lo_sum_offset_hasher::hash (const loongarch_lo_sum_offset *entry) ++{ ++ return loongarch_hash_base (entry->base); ++} ++ ++inline bool ++loongarch_lo_sum_offset_hasher::equal (const loongarch_lo_sum_offset *entry, ++ const rtx_def *value) ++{ ++ return rtx_equal_p (entry->base, value); ++} ++ ++typedef hash_table loongarch_offset_table; ++ ++ ++/* Subroutine of loongarch_reorg to manage passes that require DF. */ ++ ++static void ++loongarch_df_reorg (void) ++{ ++ /* Create def-use chains. */ ++ df_set_flags (DF_EQ_NOTES); ++ df_chain_add_problem (DF_UD_CHAIN); ++ df_analyze (); ++ ++ if (TARGET_RELAX_PIC_CALLS) ++ loongarch_annotate_pic_calls (); ++ ++ df_finish_pass (false); ++} ++ ++/* Implement TARGET_MACHINE_DEPENDENT_REORG. */ ++ ++static void ++loongarch_reorg (void) ++{ ++ /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF.DF insn info is only kept up ++ to date if the CFG is available. */ ++ if (loongarch_cfg_in_reorg ()) ++ compute_bb_for_insn (); ++ if (loongarch_cfg_in_reorg ()) ++ { ++ loongarch_df_reorg (); ++ free_bb_for_insn (); ++ } ++} ++ ++/* We use a machine specific pass to do a second machine dependent reorg ++ pass after delay branch scheduling. */ ++ ++static unsigned int ++loongarch_machine_reorg2 (void) ++{ ++// loongarch_insert_insn_pseudos (); ++ return 0; ++} ++ ++namespace { ++ ++const pass_data pass_data_loongarch_machine_reorg2 = ++{ ++ RTL_PASS, /* type */ ++ "mach2", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_loongarch_machine_reorg2 : public rtl_opt_pass ++{ ++public: ++ pass_loongarch_machine_reorg2(gcc::context *ctxt) ++ : rtl_opt_pass(pass_data_loongarch_machine_reorg2, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual unsigned int execute (function *) { return loongarch_machine_reorg2 (); } ++ ++}; // class pass_loongarch_machine_reorg2 ++ ++} // anon namespace ++ ++rtl_opt_pass * ++make_pass_loongarch_machine_reorg2 (gcc::context *ctxt) ++{ ++ return new pass_loongarch_machine_reorg2 (ctxt); ++} ++ ++ ++/* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text ++ in order to avoid duplicating too much logic from elsewhere. */ ++ ++static void ++loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, ++ tree function) ++{ ++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); ++ rtx this_rtx, temp1, temp2, fnaddr; ++ rtx_insn *insn; ++ bool use_sibcall_p; ++ ++ /* Pretend to be a post-reload pass while generating rtl. */ ++ reload_completed = 1; ++ ++ /* Mark the end of the (empty) prologue. */ ++ emit_note (NOTE_INSN_PROLOGUE_END); ++ ++ /* Determine if we can use a sibcall to call FUNCTION directly. */ ++ fnaddr = XEXP (DECL_RTL (function), 0); ++ use_sibcall_p = (loongarch_function_ok_for_sibcall (function, NULL) ++ && const_call_insn_operand (fnaddr, Pmode)); ++ ++// /* Determine if we need to load FNADDR from the GOT. */ ++// if (!use_sibcall_p ++// && (loongarch_got_symbol_type_p ++// (loongarch_classify_symbol (fnaddr, SYMBOL_CONTEXT_LEA)))) ++// { ++// /* Pick a global pointer. Use a call-clobbered register if ++// TARGET_CALL_SAVED_GP. */ ++// cfun->machine->global_pointer ++// = GLOBAL_POINTER_REGNUM; ++// cfun->machine->must_initialize_gp_p = true; ++// SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer); ++// ++// /* Set up the global pointer for n32 or n64 abicalls. */ ++// loongarch_emit_loadgp (); ++// } ++ ++ /* We need two temporary registers in some cases. */ ++ temp1 = gen_rtx_REG (Pmode, 12); ++ temp2 = gen_rtx_REG (Pmode, 13); ++ ++ /* Find out which register contains the "this" pointer. */ ++ if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) ++ this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1); ++ else ++ this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST); ++ ++ /* Add DELTA to THIS_RTX. */ ++ if (delta != 0) ++ { ++ rtx offset = GEN_INT (delta); ++ if (!SMALL_OPERAND (delta)) ++ { ++ loongarch_emit_move (temp1, offset); ++ offset = temp1; ++ } ++ emit_insn (gen_add3_insn (this_rtx, this_rtx, offset)); ++ } ++ ++ /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ ++ if (vcall_offset != 0) ++ { ++ rtx addr; ++ ++ /* Set TEMP1 to *THIS_RTX. */ ++ loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx)); ++ ++ /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ ++ addr = loongarch_add_offset (temp2, temp1, vcall_offset); ++ ++ /* Load the offset and add it to THIS_RTX. */ ++ loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr)); ++ emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); ++ } ++ ++ /* Jump to the target function. Use a sibcall if direct jumps are ++ allowed, otherwise load the address into a register first. */ ++ if (use_sibcall_p) ++ { ++ insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); ++ SIBLING_CALL_P (insn) = 1; ++ } ++ else ++ { ++ loongarch_emit_move (temp1, fnaddr); ++ emit_jump_insn (gen_indirect_jump (temp1)); ++ } ++ ++ /* Run just enough of rest_of_compilation. This sequence was ++ "borrowed" from alpha.c. */ ++ insn = get_insns (); ++ split_all_insns_noflow (); ++ shorten_branches (insn); ++ assemble_start_function (thunk_fndecl, fnname); ++ final_start_function (insn, file, 1); ++ final (insn, file, 1); ++ final_end_function (); ++ assemble_end_function (thunk_fndecl, fnname); ++ ++ /* Clean up the vars set above. Note that final_end_function resets ++ the global pointer for us. */ ++ reload_completed = 0; ++} ++ ++ ++/* The last argument passed to loongarch_set_compression_mode, ++ or negative if the function hasn't been called yet. */ ++static unsigned int old_compression_mode = -1; ++ ++/* Set up the target-dependent global state for ISA mode COMPRESSION_MODE. */ ++ ++static void ++loongarch_set_compression_mode (unsigned int compression_mode) ++{ ++ ++ if (compression_mode == old_compression_mode) ++ return; ++ ++ /* Restore base settings of various flags. */ ++ target_flags = loongarch_base_target_flags; ++ flag_schedule_insns = loongarch_base_schedule_insns; ++ flag_reorder_blocks_and_partition = loongarch_base_reorder_blocks_and_partition; ++ flag_move_loop_invariants = loongarch_base_move_loop_invariants; ++ str_align_loops = loongarch_base_align_loops; ++ str_align_jumps = loongarch_base_align_jumps; ++ str_align_functions = loongarch_base_align_functions; ++ target_flags |= compression_mode; ++ ++ /* Provide default values for align_* for 64-bit targets. */ ++ if (TARGET_64BIT) ++ { ++ if (flag_align_loops && !str_align_loops) ++ str_align_loops = "8"; ++ if (flag_align_jumps && !str_align_jumps) ++ str_align_jumps = "8"; ++ if (flag_align_functions && !str_align_functions) ++ str_align_functions = "8"; ++ } ++ ++ targetm.min_anchor_offset = -32768; ++ targetm.max_anchor_offset = 32767; ++ targetm.const_anchor = 0x8000; ++ restore_target_globals (&default_target_globals); ++ old_compression_mode = compression_mode; ++} ++ ++/* Implement TARGET_SET_CURRENT_FUNCTION. Decide whether the current ++ function should use switch modes accordingly. */ ++ ++static void ++loongarch_set_current_function (tree fndecl) ++{ ++ loongarch_set_compression_mode (loongarch_get_compress_mode (fndecl)); ++} ++ ++/* Allocate a chunk of memory for per-function machine-dependent data. */ ++ ++static struct machine_function * ++loongarch_init_machine_status (void) ++{ ++ return ggc_cleared_alloc (); ++} ++ ++/* Return the processor associated with the given ISA level, or null ++ if the ISA isn't valid. */ ++ ++static const struct loongarch_cpu_info * ++loongarch_cpu_info_from_isa (int isa) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++) ++ if (loongarch_cpu_info_table[i].isa == isa) ++ return loongarch_cpu_info_table + i; ++ ++ return NULL; ++} ++ ++/* Return a loongarch_cpu_info entry determined by an option valued ++ OPT. */ ++ ++static const struct loongarch_cpu_info * ++loongarch_cpu_info_from_opt (int opt) ++{ ++ switch (opt) ++ { ++ case LARCH_ARCH_OPTION_FROM_ABI: ++ /* 'from-abi' selects the most compatible architecture for the ++ given ABI */ ++ return loongarch_cpu_info_from_isa (ABI_NEEDS_32BIT_REGS ? 1 ++ : (TARGET_64BIT ? 3 : 1)); ++ ++ case LARCH_ARCH_OPTION_NATIVE: ++ gcc_unreachable (); ++ ++ default: ++ return &loongarch_cpu_info_table[opt]; ++ } ++} ++ ++/* Return a default loongarch_cpu_info entry, given that no -march= option ++ was explicitly specified. */ ++ ++static const struct loongarch_cpu_info * ++loongarch_default_arch (void) ++{ ++#if defined (LARCH_CPU_STRING_DEFAULT) ++ unsigned int i; ++ for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++) ++ if (strcmp (loongarch_cpu_info_table[i].name, LARCH_CPU_STRING_DEFAULT) == 0) ++ return loongarch_cpu_info_table + i; ++ gcc_unreachable (); ++#elif defined (LARCH_ISA_DEFAULT) ++ return loongarch_cpu_info_from_isa (LARCH_ISA_DEFAULT); ++#else ++ /* 'from-abi' makes a good default: you get whatever the ABI ++ requires. */ ++ return loongarch_cpu_info_from_opt (LARCH_ARCH_OPTION_FROM_ABI); ++#endif ++} ++ ++/* Set up globals to generate code for the ISA or processor ++ described by INFO. */ ++ ++static void ++loongarch_set_architecture (const struct loongarch_cpu_info *info) ++{ ++ if (info != 0) ++ { ++ loongarch_arch_info = info; ++ loongarch_arch = info->cpu; ++ loongarch_isa = info->isa; ++ if (loongarch_isa < 32) ++ loongarch_isa_rev = 0; ++ else ++ loongarch_isa_rev = (loongarch_isa & 31) + 1; ++ } ++} ++ ++/* Likewise for tuning. */ ++ ++static void ++loongarch_set_tune (const struct loongarch_cpu_info *info) ++{ ++ if (info != 0) ++ { ++ loongarch_tune_info = info; ++ loongarch_tune = info->cpu; ++ } ++} ++ ++/* Implement TARGET_OPTION_OVERRIDE. */ ++ ++static void ++loongarch_option_override (void) ++{ ++ int i, start, regno, mode; ++ ++ if (global_options_set.x_loongarch_isa_option) ++ loongarch_isa_option_info = &loongarch_cpu_info_table[loongarch_isa_option]; ++ ++#ifdef SUBTARGET_OVERRIDE_OPTIONS ++ SUBTARGET_OVERRIDE_OPTIONS; ++#endif ++ ++ ++ /* Save the base compression state and process flags as though we ++ were generating uncompressed code. */ ++ loongarch_base_compression_flags = 0; ++ ++ /* -mno-float overrides -mhard-float and -msoft-float. */ ++ if (TARGET_NO_FLOAT) ++ { ++ target_flags |= MASK_SOFT_FLOAT_ABI; ++ target_flags_explicit |= MASK_SOFT_FLOAT_ABI; ++ } ++ ++ ++ /* Set the small data limit. */ ++ loongarch_small_data_threshold = (global_options_set.x_g_switch_value ++ ? g_switch_value ++ : LARCH_DEFAULT_GVALUE); ++ ++ /* The following code determines the architecture and register size. ++ Similar code was added to GAS 2.14 (see tc-loongarch.c:md_after_parse_args()). ++ The GAS and GCC code should be kept in sync as much as possible. */ ++ ++ if (global_options_set.x_loongarch_arch_option) ++ loongarch_set_architecture (loongarch_cpu_info_from_opt (loongarch_arch_option)); ++ ++ if (loongarch_isa_option_info != 0) ++ { ++ if (loongarch_arch_info == 0) ++ loongarch_set_architecture (loongarch_isa_option_info); ++ else if (loongarch_arch_info->isa != loongarch_isa_option_info->isa) ++ error ("%<-%s%> conflicts with the other architecture options, " ++ "which specify a %s processor", ++ loongarch_isa_option_info->name, ++ loongarch_cpu_info_from_isa (loongarch_arch_info->isa)->name); ++ } ++ ++ if (loongarch_arch_info == 0) ++ loongarch_set_architecture (loongarch_default_arch ()); ++ ++ /* Optimize for loongarch_arch, unless -mtune selects a different processor. */ ++ if (global_options_set.x_loongarch_tune_option) ++ loongarch_set_tune (loongarch_cpu_info_from_opt (loongarch_tune_option)); ++ ++ if (loongarch_tune_info == 0) ++ loongarch_set_tune (loongarch_arch_info); ++ ++ if ((target_flags_explicit & MASK_64BIT) != 0) ++ { ++ if (TARGET_64BIT && ABI_NEEDS_32BIT_REGS) ++ error ("%<-mgp64%> used with a 32-bit ABI"); ++ } ++ else ++ { ++ /* Infer the integer register size from the ABI and processor. ++ Restrict ourselves to 32-bit registers if that's all the ++ processor has, or if the ABI cannot handle 64-bit registers. */ ++ if (ABI_NEEDS_32BIT_REGS) ++ target_flags &= ~MASK_64BIT; ++ else ++ target_flags |= MASK_64BIT; ++ } ++ ++ if ((target_flags_explicit & MASK_FLOAT64) != 0) ++ { ++ if (loongarch_isa_rev >= 6 && !TARGET_FLOAT64) ++ error ("the %qs architecture does not support %<-mfp32%>", ++ loongarch_arch_info->name); ++ else if (TARGET_SINGLE_FLOAT && TARGET_FLOAT64) ++ error ("unsupported combination: %s", "-mfp64 -msingle-float"); ++ else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT && !TARGET_FLOAT64) ++ error ("unsupported combination: %s", "-mgp64 -mfp32 -mdouble-float"); ++ else if (!TARGET_64BIT && TARGET_FLOAT64) ++ { ++ if (!ISA_HAS_MXFRH) ++ error ("%<-mgp32%> and %<-mfp64%> can only be combined if" ++ " the target supports the mfhc1 and mthc1 instructions"); ++ else if (loongarch_abi != ABILP32) ++ error ("%<-mgp32%> and %<-mfp64%> can only be combined when using" ++ " the o32 ABI"); ++ } ++ } ++ else ++ { ++ /* -msingle-float selects 32-bit float registers. On r6 and later, ++ -mdouble-float selects 64-bit float registers, since the old paired ++ register model is not supported. In other cases the float registers ++ should be the same size as the integer ones. */ ++ if (loongarch_isa_rev >= 6 && TARGET_DOUBLE_FLOAT) ++ target_flags |= MASK_FLOAT64; ++ else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT) ++ target_flags |= MASK_FLOAT64; ++ else ++ target_flags &= ~MASK_FLOAT64; ++ } ++ ++ /* End of code shared with GAS. */ ++ ++ /* If a -mlong* option was given, check that it matches the ABI, ++ otherwise infer the -mlong* setting from the other options. */ ++ if ((target_flags_explicit & MASK_LONG64) != 0) ++ { ++ if (TARGET_LONG64) ++ { ++ if (loongarch_abi == ABILPX32) ++ error ("%qs is incompatible with %qs", "-mabi=lpx32", "-mlong64"); ++ else if (loongarch_abi == ABILP32) ++ error ("%qs is incompatible with %qs", "-mabi=lp32", "-mlong64"); ++ } ++ else ++ { ++ if (loongarch_abi == ABILP64) ++ error ("%qs is incompatible with %qs", "-mabi=lp64", "-mlong32"); ++ } ++ } ++ else ++ { ++ if (loongarch_abi == ABILP64) ++ target_flags |= MASK_LONG64; ++ else ++ target_flags &= ~MASK_LONG64; ++ } ++ ++ if (!TARGET_OLDABI) ++ flag_pcc_struct_return = 0; ++ ++ /* Decide which rtx_costs structure to use. */ ++ if (optimize_size) ++ loongarch_cost = &loongarch_rtx_cost_optimize_size; ++ else ++ loongarch_cost = &loongarch_rtx_cost_data[loongarch_tune]; ++ ++ /* If the user hasn't specified a branch cost, use the processor's ++ default. */ ++ if (loongarch_branch_cost == 0) ++ loongarch_branch_cost = loongarch_cost->branch_cost; ++ ++ /* Prefer a call to memcpy over inline code when optimizing for size, ++ though see MOVE_RATIO in loongarch.h. */ ++ if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0) ++ target_flags |= MASK_MEMCPY; ++ ++ /* If we have a nonzero small-data limit, check that the -mgpopt ++ setting is consistent with the other target flags. */ ++ if (loongarch_small_data_threshold > 0) ++ { ++ if (!TARGET_GPOPT) ++ { ++ TARGET_LOCAL_SDATA = false; ++ TARGET_EXTERN_SDATA = false; ++ } ++ else ++ { ++ if (TARGET_VXWORKS_RTP) ++ warning (0, "cannot use small-data accesses for %qs", "-mrtp"); ++ } ++ } ++ ++ /* .cfi_* directives generate a read-only section, so fall back on ++ manual .eh_frame creation if we need the section to be writable. */ ++ if (TARGET_WRITABLE_EH_FRAME) ++ flag_dwarf2_cfi_asm = 0; ++ ++ loongarch_init_print_operand_punct (); ++ ++ /* Set up array to map GCC register number to debug register number. ++ Ignore the special purpose register numbers. */ ++ ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ { ++ loongarch_dbx_regno[i] = IGNORED_DWARF_REGNUM; ++ if (GP_REG_P (i) || FP_REG_P (i)) ++ loongarch_dwarf_regno[i] = i; ++ else ++ loongarch_dwarf_regno[i] = INVALID_REGNUM; ++ } ++ ++ start = GP_DBX_FIRST - GP_REG_FIRST; ++ for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++) ++ loongarch_dbx_regno[i] = i + start; ++ ++ start = FP_DBX_FIRST - FP_REG_FIRST; ++ for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) ++ loongarch_dbx_regno[i] = i + start; ++ ++ /* Set up loongarch_hard_regno_mode_ok. */ ++ for (mode = 0; mode < MAX_MACHINE_MODE; mode++) ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ loongarch_hard_regno_mode_ok_p[mode][regno] ++ = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); ++ ++ /* Function to allocate machine-dependent function status. */ ++ init_machine_status = &loongarch_init_machine_status; ++ target_flags &= ~MASK_RELAX_PIC_CALLS; ++ ++ /* Save base state of options. */ ++ loongarch_base_target_flags = target_flags; ++ loongarch_base_schedule_insns = flag_schedule_insns; ++ loongarch_base_reorder_blocks_and_partition = flag_reorder_blocks_and_partition; ++ loongarch_base_move_loop_invariants = flag_move_loop_invariants; ++ loongarch_base_align_loops = str_align_loops; ++ loongarch_base_align_jumps = str_align_jumps; ++ loongarch_base_align_functions = str_align_functions; ++ ++ /* Now select the ISA mode. ++ ++ Do all CPP-sensitive stuff in uncompressed mode; we'll switch modes ++ later if required. */ ++ loongarch_set_compression_mode (0); ++ ++ /* We register a second machine specific reorg pass after delay slot ++ filling. Registering the pass must be done at start up. It's ++ convenient to do it here. */ ++ opt_pass *new_pass = make_pass_loongarch_machine_reorg2 (g); ++ struct register_pass_info insert_pass_loongarch_machine_reorg2 = ++ { ++ new_pass, /* pass */ ++ "dbr", /* reference_pass_name */ ++ 1, /* ref_pass_instance_number */ ++ PASS_POS_INSERT_AFTER /* po_op */ ++ }; ++ register_pass (&insert_pass_loongarch_machine_reorg2); ++ ++ loongarch_register_frame_header_opt (); ++} ++ ++ ++/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ ++ ++static void ++loongarch_conditional_register_usage (void) ++{ ++ if (!TARGET_HARD_FLOAT) ++ { ++ accessible_reg_set &= ~(reg_class_contents[(int) FP_REGS] | reg_class_contents[(int) ST_REGS]); ++ } ++} ++ ++/* Implement EH_USES. */ ++ ++bool ++loongarch_eh_uses (unsigned int regno) ++{ ++ return false; ++} ++ ++/* Implement EPILOGUE_USES. */ ++ ++bool ++loongarch_epilogue_uses (unsigned int regno) ++{ ++ /* Say that the epilogue uses the return address register. Note that ++ in the case of sibcalls, the values "used by the epilogue" are ++ considered live at the start of the called function. */ ++ if (regno == RETURN_ADDR_REGNUM) ++ return true; ++ ++ /* An interrupt handler must preserve some registers that are ++ ordinarily call-clobbered. */ ++ if (cfun->machine->interrupt_handler_p ++ && loongarch_interrupt_extra_call_saved_reg_p (regno)) ++ return true; ++ ++ return false; ++} ++ ++/* Return true if MEM1 and MEM2 use the same base register, and the ++ offset of MEM2 equals the offset of MEM1 plus 4. FIRST_REG is the ++ register into (from) which the contents of MEM1 will be loaded ++ (stored), depending on the value of LOAD_P. ++ SWAP_P is true when the 1st and 2nd instructions are swapped. */ ++ ++static bool ++loongarch_load_store_pair_p_1 (bool load_p, bool swap_p, ++ rtx first_reg, rtx mem1, rtx mem2) ++{ ++ rtx base1, base2; ++ HOST_WIDE_INT offset1, offset2; ++ ++ if (!MEM_P (mem1) || !MEM_P (mem2)) ++ return false; ++ ++ loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); ++ loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); ++ ++ if (!REG_P (base1) || !rtx_equal_p (base1, base2)) ++ return false; ++ ++ /* Avoid invalid load pair instructions. */ ++ if (load_p && REGNO (first_reg) == REGNO (base1)) ++ return false; ++ ++ /* We must avoid this case for anti-dependence. ++ Ex: lw $3, 4($3) ++ lw $2, 0($3) ++ first_reg is $2, but the base is $3. */ ++ if (load_p ++ && swap_p ++ && REGNO (first_reg) + 1 == REGNO (base1)) ++ return false; ++ ++ if (offset2 != offset1 + 4) ++ return false; ++ ++ if (!ULARCH_12BIT_OFFSET_P (offset1)) ++ return false; ++ ++ return true; ++} ++ ++bool ++loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p) ++{ ++ rtx reg1, reg2, mem1, mem2, base1, base2; ++ enum reg_class rc1, rc2; ++ HOST_WIDE_INT offset1, offset2; ++ ++ if (load_p) ++ { ++ reg1 = operands[0]; ++ reg2 = operands[2]; ++ mem1 = operands[1]; ++ mem2 = operands[3]; ++ } ++ else ++ { ++ reg1 = operands[1]; ++ reg2 = operands[3]; ++ mem1 = operands[0]; ++ mem2 = operands[2]; ++ } ++ ++ if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0 ++ || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0) ++ return false; ++ ++ loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); ++ loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); ++ ++ /* Base regs do not match. */ ++ if (!REG_P (base1) || !rtx_equal_p (base1, base2)) ++ return false; ++ ++ /* Either of the loads is clobbering base register. It is legitimate to bond ++ loads if second load clobbers base register. However, hardware does not ++ support such bonding. */ ++ if (load_p ++ && (REGNO (reg1) == REGNO (base1) ++ || (REGNO (reg2) == REGNO (base1)))) ++ return false; ++ ++ /* Loading in same registers. */ ++ if (load_p ++ && REGNO (reg1) == REGNO (reg2)) ++ return false; ++ ++ /* The loads/stores are not of same type. */ ++ rc1 = REGNO_REG_CLASS (REGNO (reg1)); ++ rc2 = REGNO_REG_CLASS (REGNO (reg2)); ++ if (rc1 != rc2 ++ && !reg_class_subset_p (rc1, rc2) ++ && !reg_class_subset_p (rc2, rc1)) ++ return false; ++ ++ if (abs (offset1 - offset2) != GET_MODE_SIZE (mode)) ++ return false; ++ ++ return true; ++} ++ ++/* OPERANDS describes the operands to a pair of SETs, in the order ++ dest1, src1, dest2, src2. Return true if the operands can be used ++ in an LWP or SWP instruction; LOAD_P says which. */ ++ ++bool ++loongarch_load_store_pair_p (bool load_p, rtx *operands) ++{ ++ rtx reg1, reg2, mem1, mem2; ++ ++ if (load_p) ++ { ++ reg1 = operands[0]; ++ reg2 = operands[2]; ++ mem1 = operands[1]; ++ mem2 = operands[3]; ++ } ++ else ++ { ++ reg1 = operands[1]; ++ reg2 = operands[3]; ++ mem1 = operands[0]; ++ mem2 = operands[2]; ++ } ++ ++ if (REGNO (reg2) == REGNO (reg1) + 1) ++ return loongarch_load_store_pair_p_1 (load_p, false, reg1, mem1, mem2); ++ ++ if (REGNO (reg1) == REGNO (reg2) + 1) ++ return loongarch_load_store_pair_p_1 (load_p, true, reg2, mem2, mem1); ++ ++ return false; ++} ++ ++/* Return true if REG1 and REG2 match the criteria for a movep insn. */ ++ ++bool ++loongarch_movep_target_p (rtx reg1, rtx reg2) ++{ ++ int regno1, regno2, pair; ++ unsigned int i; ++ static const int match[8] = { ++ 0x00000060, /* 5, 6 */ ++ 0x000000a0, /* 5, 7 */ ++ 0x000000c0, /* 6, 7 */ ++ 0x00200010, /* 4, 21 */ ++ 0x00400010, /* 4, 22 */ ++ 0x00000030, /* 4, 5 */ ++ 0x00000050, /* 4, 6 */ ++ 0x00000090 /* 4, 7 */ ++ }; ++ ++ if (!REG_P (reg1) || !REG_P (reg2)) ++ return false; ++ ++ regno1 = REGNO (reg1); ++ regno2 = REGNO (reg2); ++ ++ if (!GP_REG_P (regno1) || !GP_REG_P (regno2)) ++ return false; ++ ++ pair = (1 << regno1) | (1 << regno2); ++ ++ for (i = 0; i < ARRAY_SIZE (match); i++) ++ if (pair == match[i]) ++ return true; ++ ++ return false; ++} ++ ++/* Return the size in bytes of the trampoline code, padded to ++ TRAMPOLINE_ALIGNMENT bits. The static chain pointer and target ++ function address immediately follow. */ ++ ++int ++loongarch_trampoline_code_size (void) ++{ ++ return 4 * 4; ++} ++ ++/* Implement TARGET_TRAMPOLINE_INIT. */ ++ ++static void ++loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) ++{ ++ rtx addr, end_addr, high, low, opcode, mem; ++ rtx trampoline[8]; ++ unsigned int i, j; ++ HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset; ++ ++ /* Work out the offsets of the pointers from the start of the ++ trampoline code. */ ++ end_addr_offset = loongarch_trampoline_code_size (); ++ static_chain_offset = end_addr_offset; ++ target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode); ++ ++ /* Get pointers to the beginning and end of the code block. */ ++ addr = force_reg (Pmode, XEXP (m_tramp, 0)); ++ end_addr = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset)); ++ ++#define OP(X) gen_int_mode (X, SImode) ++ ++ /* Build up the code in TRAMPOLINE. */ ++ i = 0; ++ /* ++ pcaddi $static_chain,0 ++ ld.[dw] $tmp,$static_chain,target_function_offset ++ ld.[dw] $static_chain,$static_chain,static_chain_offset ++ jirl $r0,$tmp,0 ++ */ ++ trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST)); ++ trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) ++ | 19 /* $t7 */ ++ | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) ++ | ((target_function_offset & 0xfff) << 10)); ++ trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) ++ | (STATIC_CHAIN_REGNUM - GP_REG_FIRST) ++ | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) ++ | ((static_chain_offset & 0xfff) << 10)); ++ trampoline[i++] = OP (0x4c000000 | (19 << 5)); ++#undef OP ++ ++ for (j = 0; j < i; j++) ++ { ++ mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode)); ++ loongarch_emit_move (mem, trampoline[j]); ++ } ++ ++ /* Set up the static chain pointer field. */ ++ mem = adjust_address (m_tramp, ptr_mode, static_chain_offset); ++ loongarch_emit_move (mem, chain_value); ++ ++ /* Set up the target function field. */ ++ mem = adjust_address (m_tramp, ptr_mode, target_function_offset); ++ loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0)); ++ ++ /* Flush the code part of the trampoline. */ ++ emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE))); ++ emit_insn (gen_clear_cache (addr, end_addr)); ++} ++ ++ ++/* Implement TARGET_SHIFT_TRUNCATION_MASK. We want to keep the default ++ behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even ++ when TARGET_LOONGSON_MMI is true. */ ++ ++static unsigned HOST_WIDE_INT ++loongarch_shift_truncation_mask (machine_mode mode) ++{ ++ return GET_MODE_BITSIZE (mode) - 1; ++} ++ ++/* Implement TARGET_PREPARE_PCH_SAVE. */ ++ ++static void ++loongarch_prepare_pch_save (void) ++{ ++ loongarch_set_compression_mode (0); ++} ++ ++/* Generate or test for an insn that supports a constant permutation. */ ++ ++#define MAX_VECT_LEN 32 ++ ++struct expand_vec_perm_d ++{ ++ rtx target, op0, op1; ++ unsigned char perm[MAX_VECT_LEN]; ++ machine_mode vmode; ++ unsigned char nelt; ++ bool one_vector_p; ++ bool testing_p; ++}; ++ ++/* Construct (set target (vec_select op0 (parallel perm))) and ++ return true if that's a valid instruction in the active ISA. */ ++ ++static bool ++loongarch_expand_vselect (rtx target, rtx op0, ++ const unsigned char *perm, unsigned nelt) ++{ ++ rtx rperm[MAX_VECT_LEN], x; ++ rtx_insn *insn; ++ unsigned i; ++ ++ for (i = 0; i < nelt; ++i) ++ rperm[i] = GEN_INT (perm[i]); ++ ++ x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); ++ x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); ++ x = gen_rtx_SET (target, x); ++ ++ insn = emit_insn (x); ++ if (recog_memoized (insn) < 0) ++ { ++ remove_insn (insn); ++ return false; ++ } ++ return true; ++} ++ ++/* Similar, but generate a vec_concat from op0 and op1 as well. */ ++ ++static bool ++loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, ++ const unsigned char *perm, unsigned nelt) ++{ ++ machine_mode v2mode; ++ rtx x; ++ ++ if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) ++ return false; ++ x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); ++ return loongarch_expand_vselect (target, x, perm, nelt); ++} ++ ++static bool ++loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) ++{ ++ unsigned int i, nelt = d->nelt; ++ unsigned char perm2[MAX_VECT_LEN]; ++ ++ if (d->one_vector_p) ++ { ++ /* Try interleave with alternating operands. */ ++ memcpy (perm2, d->perm, sizeof(perm2)); ++ for (i = 1; i < nelt; i += 2) ++ perm2[i] += nelt; ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) ++ return true; ++ } ++ else ++ { ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, nelt)) ++ return true; ++ ++ /* Try again with swapped operands. */ ++ for (i = 0; i < nelt; ++i) ++ perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ ++ ++static bool ++loongarch_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, ++ rtx op1, const vec_perm_indices &sel) ++{ ++ struct expand_vec_perm_d d; ++ int i, nelt, which; ++ unsigned char orig_perm[MAX_VECT_LEN]; ++ bool ok; ++ ++ d.target = target; ++ d.op0 = op0; ++ d.op1 = op1; ++ ++ d.vmode = vmode; ++ gcc_assert (VECTOR_MODE_P (vmode)); ++ d.nelt = nelt = GET_MODE_NUNITS (vmode); ++ d.testing_p = !target; ++ ++ /* This is overly conservative, but ensures we don't get an ++ uninitialized warning on ORIG_PERM. */ ++ memset (orig_perm, 0, MAX_VECT_LEN); ++ for (i = which = 0; i < nelt; ++i) ++ { ++ int ei = sel[i] & (2 * nelt - 1); ++ which |= (ei < nelt ? 1 : 2); ++ orig_perm[i] = ei; ++ } ++ memcpy (d.perm, orig_perm, MAX_VECT_LEN); ++ ++ switch (which) ++ { ++ default: ++ gcc_unreachable(); ++ ++ case 3: ++ d.one_vector_p = false; ++ if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) ++ break; ++ /* FALLTHRU */ ++ ++ case 2: ++ for (i = 0; i < nelt; ++i) ++ d.perm[i] &= nelt - 1; ++ d.op0 = d.op1; ++ d.one_vector_p = true; ++ break; ++ ++ case 1: ++ d.op1 = d.op0; ++ d.one_vector_p = true; ++ break; ++ } ++ ++ if (d.testing_p) ++ { ++ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); ++ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); ++ if (!d.one_vector_p) ++ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); ++ ++ start_sequence (); ++ ok = loongarch_expand_vec_perm_const_1 (&d); ++ end_sequence (); ++ return ok; ++ } ++ ++ ok = loongarch_expand_vec_perm_const_1 (&d); ++ ++ /* If we were given a two-vector permutation which just happened to ++ have both input vectors equal, we folded this into a one-vector ++ permutation. There are several loongson patterns that are matched ++ via direct vec_select+vec_concat expansion, but we do not have ++ support in loongarch_expand_vec_perm_const_1 to guess the adjustment ++ that should be made for a single operand. Just try again with ++ the original permutation. */ ++ if (!ok && which == 3) ++ { ++ d.op0 = op0; ++ d.op1 = op1; ++ d.one_vector_p = false; ++ memcpy (d.perm, orig_perm, MAX_VECT_LEN); ++ ok = loongarch_expand_vec_perm_const_1 (&d); ++ } ++ ++ return ok; ++} ++ ++/* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */ ++ ++static int ++loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, ++ machine_mode mode) ++{ ++ return 1; ++} ++ ++/* A subroutine of loongarch_expand_vec_init, match constant vector elements. */ ++ ++static inline bool ++loongarch_constant_elt_p (rtx x) ++{ ++ return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE; ++} ++ ++ ++/* Implement HARD_REGNO_CALLER_SAVE_MODE. */ ++ ++machine_mode ++loongarch_hard_regno_caller_save_mode (unsigned int regno, ++ unsigned int nregs, ++ machine_mode mode) ++{ ++ /* For performance, avoid saving/restoring upper parts of a register ++ by returning MODE as save mode when the mode is known. */ ++ if (mode == VOIDmode) ++ return choose_hard_reg_mode (regno, nregs, NULL); ++ else ++ return mode; ++} ++ ++/* Implement TARGET_CASE_VALUES_THRESHOLD. */ ++ ++unsigned int ++loongarch_case_values_threshold (void) ++{ ++ return default_case_values_threshold (); ++} ++ ++ ++/* Implement TARGET_SPILL_CLASS. */ ++ ++static reg_class_t ++loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED, ++ machine_mode mode ATTRIBUTE_UNUSED) ++{ ++ return NO_REGS; ++} ++ ++/* Implement TARGET_LRA_P. */ ++ ++static bool ++loongarch_lra_p (void) ++{ ++ return loongarch_lra_flag; ++} ++ ++/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. */ ++ ++static reg_class_t ++loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, ++ reg_class_t best_class ATTRIBUTE_UNUSED) ++{ ++ /* LRA will allocate an FPR for an integer mode pseudo instead of spilling ++ to memory if an FPR is present in the allocno class. It is rare that ++ we actually need to place an integer mode value in an FPR so where ++ possible limit the allocation to GR_REGS. This will slightly pessimize ++ code that involves integer to/from float conversions as these will have ++ to reload into FPRs in LRA. Such reloads are sometimes eliminated and ++ sometimes only partially eliminated. We choose to take this penalty ++ in order to eliminate usage of FPRs in code that does not use floating ++ point data. ++ ++ This change has a similar effect to increasing the cost of FPR->GPR ++ register moves for integer modes so that they are higher than the cost ++ of memory but changing the allocno class is more reliable. ++ ++ This is also similar to forbidding integer mode values in FPRs entirely ++ but this would lead to an inconsistency in the integer to/from float ++ instructions that say integer mode values must be placed in FPRs. */ ++ if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS) ++ return GR_REGS; ++ return allocno_class; ++} ++ ++/* Implement TARGET_PROMOTE_FUNCTION_MODE */ ++ ++/* This function is equivalent to default_promote_function_mode_always_promote ++ except that it returns a promoted mode even if type is NULL_TREE. This is ++ needed by libcalls which have no type (only a mode) such as fixed conversion ++ routines that take a signed or unsigned char/short argument and convert it ++ to a fixed type. */ ++ ++static machine_mode ++loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, ++ machine_mode mode, ++ int *punsignedp ATTRIBUTE_UNUSED, ++ const_tree fntype ATTRIBUTE_UNUSED, ++ int for_return ATTRIBUTE_UNUSED) ++{ ++ int unsignedp; ++ ++ if (type != NULL_TREE) ++ return promote_mode (type, mode, punsignedp); ++ ++ unsignedp = *punsignedp; ++ PROMOTE_MODE (mode, unsignedp, type); ++ *punsignedp = unsignedp; ++ return mode; ++} ++ ++/* Implement TARGET_TRULY_NOOP_TRUNCATION. */ ++ ++static bool ++loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) ++{ ++ return !TARGET_64BIT || inprec <= 32 || outprec > 32; ++} ++ ++/* Implement TARGET_CONSTANT_ALIGNMENT. */ ++ ++static HOST_WIDE_INT ++loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align) ++{ ++ if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) ++ return MAX (align, BITS_PER_WORD); ++ return align; ++} ++ ++/* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info ++ for details about the frame layout. */ ++ ++static HOST_WIDE_INT ++loongarch_starting_frame_offset (void) ++{ ++ if (FRAME_GROWS_DOWNWARD) ++ return 0; ++ return crtl->outgoing_args_size; ++} ++ ++ ++/* Initialize the GCC target structure. */ ++#undef TARGET_ASM_ALIGNED_HI_OP ++#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" ++#undef TARGET_ASM_ALIGNED_SI_OP ++#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" ++#undef TARGET_ASM_ALIGNED_DI_OP ++#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" ++ ++#undef TARGET_OPTION_OVERRIDE ++#define TARGET_OPTION_OVERRIDE loongarch_option_override ++ ++#undef TARGET_LEGITIMIZE_ADDRESS ++#define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address ++ ++#undef TARGET_ASM_FUNCTION_PROLOGUE ++#define TARGET_ASM_FUNCTION_PROLOGUE loongarch_output_function_prologue ++#undef TARGET_ASM_FUNCTION_EPILOGUE ++#define TARGET_ASM_FUNCTION_EPILOGUE loongarch_output_function_epilogue ++#undef TARGET_ASM_SELECT_RTX_SECTION ++#define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section ++#undef TARGET_ASM_FUNCTION_RODATA_SECTION ++#define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section ++ ++#undef TARGET_SCHED_INIT ++#define TARGET_SCHED_INIT loongarch_sched_init ++#undef TARGET_SCHED_REORDER ++#define TARGET_SCHED_REORDER loongarch_sched_reorder ++#undef TARGET_SCHED_REORDER2 ++#define TARGET_SCHED_REORDER2 loongarch_sched_reorder2 ++#undef TARGET_SCHED_VARIABLE_ISSUE ++#define TARGET_SCHED_VARIABLE_ISSUE loongarch_variable_issue ++#undef TARGET_SCHED_ADJUST_COST ++#define TARGET_SCHED_ADJUST_COST loongarch_adjust_cost ++#undef TARGET_SCHED_ISSUE_RATE ++#define TARGET_SCHED_ISSUE_RATE loongarch_issue_rate ++#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ++#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ ++ loongarch_multipass_dfa_lookahead ++#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P ++#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ ++ loongarch_small_register_classes_for_mode_p ++ ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall ++ ++#undef TARGET_MERGE_DECL_ATTRIBUTES ++#define TARGET_MERGE_DECL_ATTRIBUTES loongarch_merge_decl_attributes ++#undef TARGET_CAN_INLINE_P ++#define TARGET_CAN_INLINE_P loongarch_can_inline_p ++#undef TARGET_SET_CURRENT_FUNCTION ++#define TARGET_SET_CURRENT_FUNCTION loongarch_set_current_function ++ ++#undef TARGET_VALID_POINTER_MODE ++#define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode ++#undef TARGET_REGISTER_MOVE_COST ++#define TARGET_REGISTER_MOVE_COST loongarch_register_move_cost ++#undef TARGET_MEMORY_MOVE_COST ++#define TARGET_MEMORY_MOVE_COST loongarch_memory_move_cost ++#undef TARGET_RTX_COSTS ++#define TARGET_RTX_COSTS loongarch_rtx_costs ++#undef TARGET_ADDRESS_COST ++#define TARGET_ADDRESS_COST loongarch_address_cost ++ ++#undef TARGET_IN_SMALL_DATA_P ++#define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p ++ ++#undef TARGET_MACHINE_DEPENDENT_REORG ++#define TARGET_MACHINE_DEPENDENT_REORG loongarch_reorg ++ ++#undef TARGET_PREFERRED_RELOAD_CLASS ++#define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class ++ ++#undef TARGET_EXPAND_TO_RTL_HOOK ++#define TARGET_EXPAND_TO_RTL_HOOK loongarch_expand_to_rtl_hook ++#undef TARGET_ASM_FILE_START ++#define TARGET_ASM_FILE_START loongarch_file_start ++#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE ++#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true ++#undef TARGET_ASM_CODE_END ++#define TARGET_ASM_CODE_END loongarch_code_end ++ ++#undef TARGET_EXPAND_BUILTIN_VA_START ++#define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start ++ ++#undef TARGET_PROMOTE_FUNCTION_MODE ++#define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode ++#undef TARGET_RETURN_IN_MEMORY ++#define TARGET_RETURN_IN_MEMORY n_loongarch_return_in_memory ++ ++#undef TARGET_ASM_OUTPUT_MI_THUNK ++#define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk ++#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK ++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true ++ ++#undef TARGET_PRINT_OPERAND ++#define TARGET_PRINT_OPERAND loongarch_print_operand ++#undef TARGET_PRINT_OPERAND_ADDRESS ++#define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address ++#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P ++#define TARGET_PRINT_OPERAND_PUNCT_VALID_P loongarch_print_operand_punct_valid_p ++ ++#undef TARGET_SETUP_INCOMING_VARARGS ++#define TARGET_SETUP_INCOMING_VARARGS n_loongarch_setup_incoming_varargs ++#undef TARGET_STRICT_ARGUMENT_NAMING ++#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true ++#undef TARGET_MUST_PASS_IN_STACK ++#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size ++#undef TARGET_PASS_BY_REFERENCE ++#define TARGET_PASS_BY_REFERENCE n_loongarch_pass_by_reference ++#undef TARGET_ARG_PARTIAL_BYTES ++#define TARGET_ARG_PARTIAL_BYTES n_loongarch_arg_partial_bytes ++#undef TARGET_FUNCTION_ARG ++#define TARGET_FUNCTION_ARG n_loongarch_function_arg ++#undef TARGET_FUNCTION_ARG_ADVANCE ++#define TARGET_FUNCTION_ARG_ADVANCE n_loongarch_function_arg_advance ++#undef TARGET_FUNCTION_ARG_BOUNDARY ++#define TARGET_FUNCTION_ARG_BOUNDARY n_loongarch_function_arg_boundary ++ ++#undef TARGET_SCALAR_MODE_SUPPORTED_P ++#define TARGET_SCALAR_MODE_SUPPORTED_P loongarch_scalar_mode_supported_p ++ ++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE ++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE loongarch_preferred_simd_mode ++ ++#undef TARGET_INIT_BUILTINS ++#define TARGET_INIT_BUILTINS loongarch_init_builtins ++#undef TARGET_BUILTIN_DECL ++#define TARGET_BUILTIN_DECL loongarch_builtin_decl ++#undef TARGET_EXPAND_BUILTIN ++#define TARGET_EXPAND_BUILTIN loongarch_expand_builtin ++ ++#undef TARGET_HAVE_TLS ++#define TARGET_HAVE_TLS HAVE_AS_TLS ++ ++#undef TARGET_CANNOT_FORCE_CONST_MEM ++#define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem ++ ++#undef TARGET_LEGITIMATE_CONSTANT_P ++#define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p ++ ++#undef TARGET_ENCODE_SECTION_INFO ++#define TARGET_ENCODE_SECTION_INFO loongarch_encode_section_info ++ ++#undef TARGET_ATTRIBUTE_TABLE ++#define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table ++/* All our function attributes are related to how out-of-line copies should ++ be compiled or called. They don't in themselves prevent inlining. */ ++#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P ++#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true ++ ++#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P ++#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true ++#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P ++#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p ++ ++#undef TARGET_COMP_TYPE_ATTRIBUTES ++#define TARGET_COMP_TYPE_ATTRIBUTES loongarch_comp_type_attributes ++ ++#ifdef HAVE_AS_DTPRELWORD ++#undef TARGET_ASM_OUTPUT_DWARF_DTPREL ++#define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel ++#endif ++#undef TARGET_DWARF_REGISTER_SPAN ++#define TARGET_DWARF_REGISTER_SPAN loongarch_dwarf_register_span ++#undef TARGET_DWARF_FRAME_REG_MODE ++#define TARGET_DWARF_FRAME_REG_MODE loongarch_dwarf_frame_reg_mode ++ ++#undef TARGET_LEGITIMATE_ADDRESS_P ++#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p ++ ++#undef TARGET_FRAME_POINTER_REQUIRED ++#define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required ++ ++#undef TARGET_CAN_ELIMINATE ++#define TARGET_CAN_ELIMINATE loongarch_can_eliminate ++ ++#undef TARGET_CONDITIONAL_REGISTER_USAGE ++#define TARGET_CONDITIONAL_REGISTER_USAGE loongarch_conditional_register_usage ++ ++#undef TARGET_TRAMPOLINE_INIT ++#define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init ++ ++#undef TARGET_ASM_OUTPUT_SOURCE_FILENAME ++#define TARGET_ASM_OUTPUT_SOURCE_FILENAME loongarch_output_filename ++ ++#undef TARGET_SHIFT_TRUNCATION_MASK ++#define TARGET_SHIFT_TRUNCATION_MASK loongarch_shift_truncation_mask ++ ++#undef TARGET_PREPARE_PCH_SAVE ++#define TARGET_PREPARE_PCH_SAVE loongarch_prepare_pch_save ++ ++#undef TARGET_VECTORIZE_VEC_PERM_CONST ++#define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const ++ ++#undef TARGET_SCHED_REASSOCIATION_WIDTH ++#define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width ++ ++#undef TARGET_CASE_VALUES_THRESHOLD ++#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold ++ ++#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV ++#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv ++ ++#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS ++#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true ++ ++#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P ++#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ ++ loongarch_use_by_pieces_infrastructure_p ++ ++#undef TARGET_SPILL_CLASS ++#define TARGET_SPILL_CLASS loongarch_spill_class ++#undef TARGET_LRA_P ++#define TARGET_LRA_P loongarch_lra_p ++#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS ++#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS loongarch_ira_change_pseudo_allocno_class ++ ++#undef TARGET_HARD_REGNO_SCRATCH_OK ++#define TARGET_HARD_REGNO_SCRATCH_OK loongarch_hard_regno_scratch_ok ++ ++#undef TARGET_HARD_REGNO_NREGS ++#define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs ++#undef TARGET_HARD_REGNO_MODE_OK ++#define TARGET_HARD_REGNO_MODE_OK loongarch_hard_regno_mode_ok ++ ++#undef TARGET_MODES_TIEABLE_P ++#define TARGET_MODES_TIEABLE_P loongarch_modes_tieable_p ++ ++#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED ++#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ ++ loongarch_hard_regno_call_part_clobbered ++ ++#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS ++#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2 ++ ++#undef TARGET_SECONDARY_MEMORY_NEEDED ++#define TARGET_SECONDARY_MEMORY_NEEDED loongarch_secondary_memory_needed ++ ++#undef TARGET_CAN_CHANGE_MODE_CLASS ++#define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class ++ ++#undef TARGET_TRULY_NOOP_TRUNCATION ++#define TARGET_TRULY_NOOP_TRUNCATION loongarch_truly_noop_truncation ++ ++#undef TARGET_CONSTANT_ALIGNMENT ++#define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment ++ ++#undef TARGET_STARTING_FRAME_OFFSET ++#define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset ++ ++struct gcc_target targetm = TARGET_INITIALIZER; ++ ++#include "gt-loongarch.h" +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-c.c gcc-10.3.0/gcc/config/loongarch/loongarch-c.c +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-c.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-c.c 2022-03-23 17:40:29.347280120 +0800 +@@ -0,0 +1,117 @@ ++/* LOONGARCH-specific code for C family languages. ++ Copyright (C) 2011-2018 Free Software Foundation, Inc. ++ Contributed by Andrew Waterman (zhouyingkun@mail.loongson.cn). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "c-family/c-common.h" ++#include "cpplib.h" ++ ++#define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM) ++#define builtin_define(TXT) cpp_define (pfile, TXT) ++#define builtin_assert(TXT) cpp_assert (pfile, TXT) ++ ++/* TODO: what is the pfile technique ??? !!! */ ++ ++void loongarch_cpu_cpp_builtins (cpp_reader *pfile) ++{ ++ builtin_assert ("machine=loongarch"); ++ builtin_assert ("cpu=loongarch"); ++ builtin_define ("__loongarch__"); ++ ++ if (TARGET_FLOAT64) ++ builtin_define ("__loongarch_fpr=64"); ++ else ++ builtin_define ("__loongarch_fpr=32"); ++ ++ LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", loongarch_arch_info); ++ LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", loongarch_tune_info); ++ ++ ++ switch (loongarch_abi) ++ { ++ case ABILP32: ++ builtin_define ("_ABILP32=1"); ++ builtin_define ("_LOONGARCH_SIM=_ABILP32"); ++ builtin_define ("__loongarch32"); ++ break; ++ ++ case ABILPX32: ++ builtin_define ("_ABILPX32=2"); ++ builtin_define ("_LOONGARCH_SIM=_ABILPX32"); ++ break; ++ ++ case ABILP64: ++ builtin_define ("_ABILP64=3"); ++ builtin_define ("_LOONGARCH_SIM=_ABILP64"); ++ builtin_define ("__loongarch64"); ++ builtin_define ("__loongarch64__"); ++ break; ++ } ++ ++ builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE); ++ builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE); ++ builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE); ++ builtin_define_with_int_value ("_LOONGARCH_FPSET", ++ 32 / MAX_FPRS_PER_FMT); ++ builtin_define_with_int_value ("_LOONGARCH_SPFPSET", ++ 32); ++ ++ /* These defines reflect the ABI in use, not whether the ++ FPU is directly accessible. */ ++ if (TARGET_NO_FLOAT) ++ builtin_define ("__loongarch_no_float"); ++ else if (TARGET_HARD_FLOAT_ABI) ++ builtin_define ("__loongarch_hard_float"); ++ else ++ builtin_define ("__loongarch_soft_float"); ++ ++ if (TARGET_SINGLE_FLOAT) ++ builtin_define ("__loongarch_single_float"); ++ ++ /* Macros dependent on the C dialect. */ ++ if (preprocessing_asm_p ()) ++ { ++ builtin_define_std ("LANGUAGE_ASSEMBLY"); ++ builtin_define ("_LANGUAGE_ASSEMBLY"); ++ } ++ else if (c_dialect_cxx ()) ++ { ++ builtin_define ("_LANGUAGE_C_PLUS_PLUS"); ++ builtin_define ("__LANGUAGE_C_PLUS_PLUS"); ++ builtin_define ("__LANGUAGE_C_PLUS_PLUS__"); ++ } ++ else ++ { ++ builtin_define_std ("LANGUAGE_C"); ++ builtin_define ("_LANGUAGE_C"); ++ } ++ if (c_dialect_objc ()) ++ { ++ builtin_define ("_LANGUAGE_OBJECTIVE_C"); ++ builtin_define ("__LANGUAGE_OBJECTIVE_C"); ++ /* Bizarre, but retained for backwards compatibility. */ ++ builtin_define_std ("LANGUAGE_C"); ++ builtin_define ("_LANGUAGE_C"); ++ } ++} +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-cpus.def gcc-10.3.0/gcc/config/loongarch/loongarch-cpus.def +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-cpus.def 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-cpus.def 2022-03-23 17:40:29.347280120 +0800 +@@ -0,0 +1,39 @@ ++/* LARCH CPU names. ++ Copyright (C) 1989-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* A table describing all the processors GCC knows about. The first ++ mention of an ISA level is taken as the canonical name for that ++ ISA. ++ ++ To ease comparison, please keep this table in the same order ++ as GAS's loongarch_cpu_info_table. Please also make sure that ++ LARCH_ISA_LEVEL_SPEC and LARCH_ARCH_FLOAT_SPEC handle all -march ++ options correctly. ++ ++ Before including this file, define a macro: ++ ++ LARCH_CPU (NAME, CPU, ISA, FLAGS) ++ ++ where the arguments are the fields of struct loongarch_cpu_info. */ ++ ++/* Entries for generic ISAs. */ ++LARCH_CPU ("loongarch", PROCESSOR_LOONGARCH64, 65, 0) ++LARCH_CPU ("loongarch64", PROCESSOR_LOONGARCH64, 65, 0) ++LARCH_CPU ("gs464v", PROCESSOR_GS464V, 65, 0) ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-d.c gcc-10.3.0/gcc/config/loongarch/loongarch-d.c +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-d.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-d.c 2022-03-23 17:40:29.347280120 +0800 +@@ -0,0 +1,31 @@ ++/* Subroutines for the D front end on the LARCH architecture. ++ Copyright (C) 2017 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "d/d-target.h" ++#include "d/d-target-def.h" ++ ++/* Implement TARGET_D_CPU_VERSIONS for LARCH targets. */ ++ ++void ++loongarch_d_target_versions (void) ++{ ++ // need to be improved !! ++} +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-ftypes.def gcc-10.3.0/gcc/config/loongarch/loongarch-ftypes.def +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-ftypes.def 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-ftypes.def 2022-03-23 17:40:29.347280120 +0800 +@@ -0,0 +1,93 @@ ++/* Definitions of prototypes for LARCH built-in functions. -*- C -*- ++ Copyright (C) 2007-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Invoke DEF_LARCH_FTYPE (NARGS, LIST) for each prototype used by ++ LARCH built-in functions, where: ++ ++ NARGS is the number of arguments. ++ LIST contains the return-type code followed by the codes for each ++ argument type. ++ ++ Argument- and return-type codes are either modes or one of the following: ++ ++ VOID for void_type_node ++ INT for integer_type_node ++ POINTER for ptr_type_node ++ ++ (we don't use PTR because that's a ANSI-compatibillity macro). ++ ++ Please keep this list lexicographically sorted by the LIST argument. */ ++ ++DEF_LARCH_FTYPE (1, (DF, DF)) ++DEF_LARCH_FTYPE (2, (DF, DF, DF)) ++DEF_LARCH_FTYPE (1, (DI, DI)) ++DEF_LARCH_FTYPE (1, (DI, SI)) ++DEF_LARCH_FTYPE (1, (DI, UQI)) ++DEF_LARCH_FTYPE (1, (UDI, USI)) ++DEF_LARCH_FTYPE (1, (UQI, USI)) ++DEF_LARCH_FTYPE (1, (USI, UQI)) ++DEF_LARCH_FTYPE (1, (UHI, USI)) ++DEF_LARCH_FTYPE (2, (DI, DI, DI)) ++DEF_LARCH_FTYPE (2, (DI, DI, SI)) ++DEF_LARCH_FTYPE (2, (DI, DI, UQI)) ++DEF_LARCH_FTYPE (2, (VOID, DI, UQI)) ++DEF_LARCH_FTYPE (2, (VOID, SI, UQI)) ++DEF_LARCH_FTYPE (2, (UDI, UDI, USI)) ++DEF_LARCH_FTYPE (3, (DI, DI, SI, SI)) ++DEF_LARCH_FTYPE (3, (DI, DI, USI, USI)) ++DEF_LARCH_FTYPE (3, (DI, DI, DI, QI)) ++DEF_LARCH_FTYPE (3, (UDI, UDI, UDI, USI)) ++DEF_LARCH_FTYPE (2, (DI, POINTER, SI)) ++DEF_LARCH_FTYPE (2, (DI, SI, SI)) ++DEF_LARCH_FTYPE (2, (DI, USI, USI)) ++DEF_LARCH_FTYPE (2, (INT, DF, DF)) ++DEF_LARCH_FTYPE (2, (INT, SF, SF)) ++DEF_LARCH_FTYPE (1, (SF, SF)) ++DEF_LARCH_FTYPE (2, (SF, SF, SF)) ++DEF_LARCH_FTYPE (2, (SI, DI, SI)) ++DEF_LARCH_FTYPE (2, (SI, POINTER, SI)) ++DEF_LARCH_FTYPE (1, (SI, SI)) ++DEF_LARCH_FTYPE (1, (USI, USI)) ++DEF_LARCH_FTYPE (1, (SI, UDI)) ++DEF_LARCH_FTYPE (2, (QI, QI, QI)) ++DEF_LARCH_FTYPE (2, (HI, HI, HI)) ++DEF_LARCH_FTYPE (2, (SI, SI, SI)) ++DEF_LARCH_FTYPE (2, (SI, QI, SI)) ++DEF_LARCH_FTYPE (2, (SI, HI, SI)) ++DEF_LARCH_FTYPE (2, (SI, SI, UQI)) ++DEF_LARCH_FTYPE (2, (USI, USI, USI)) ++DEF_LARCH_FTYPE (3, (SI, SI, SI, SI)) ++DEF_LARCH_FTYPE (3, (SI, SI, SI, QI)) ++DEF_LARCH_FTYPE (3, (USI, USI, USI, USI)) ++DEF_LARCH_FTYPE (1, (SI, UQI)) ++DEF_LARCH_FTYPE (1, (SI, VOID)) ++DEF_LARCH_FTYPE (2, (UDI, UDI, UDI)) ++DEF_LARCH_FTYPE (1, (USI, VOID)) ++DEF_LARCH_FTYPE (2, (VOID, SI, CVPOINTER)) ++DEF_LARCH_FTYPE (2, (VOID, SI, SI)) ++DEF_LARCH_FTYPE (2, (VOID, DI, DI)) ++DEF_LARCH_FTYPE (2, (VOID, UQI, SI)) ++DEF_LARCH_FTYPE (1, (VOID, USI)) ++DEF_LARCH_FTYPE (2, (VOID, USI, UQI)) ++DEF_LARCH_FTYPE (2, (VOID, UQI, USI)) ++DEF_LARCH_FTYPE (2, (VOID, UHI, USI)) ++DEF_LARCH_FTYPE (2, (VOID, USI, USI)) ++DEF_LARCH_FTYPE (2, (VOID, UDI, USI)) ++DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI)) ++DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI)) +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch.h gcc-10.3.0/gcc/config/loongarch/loongarch.h +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch.h 2022-03-23 17:40:29.348280112 +0800 +@@ -0,0 +1,2076 @@ ++/* Definitions of target machine for GNU compiler. LARCH version. ++ Copyright (C) 1989-2018 Free Software Foundation, Inc. ++ Contributed by A. Lichnewsky (lich@inria.inria.fr). ++ Changed by Michael Meissner (meissner@osf.org). ++ 64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and ++ Brendan Eich (brendan@microunity.com). ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++ ++#include "config/vxworks-dummy.h" ++ ++#ifdef GENERATOR_FILE ++/* This is used in some insn conditions, so needs to be declared, but ++ does not need to be defined. */ ++extern int target_flags_explicit; ++#endif ++ ++/* LARCH external variables defined in loongarch.c. */ ++ ++/* Which ABI to use. ABILP32 (original 32, or o32), ABILPX32 (n32), ++ ABILP64 (n64) are all defined by SGI. */ ++ ++#define ABILP32 0 ++#define ABILPX32 1 ++#define ABILP64 2 ++ ++/* Information about one recognized processor. Defined here for the ++ benefit of TARGET_CPU_CPP_BUILTINS. */ ++struct loongarch_cpu_info { ++ /* The 'canonical' name of the processor as far as GCC is concerned. ++ It's typically a manufacturer's prefix followed by a numerical ++ designation. It should be lowercase. */ ++ const char *name; ++ ++ /* The internal processor number that most closely matches this ++ entry. Several processors can have the same value, if there's no ++ difference between them from GCC's point of view. */ ++ enum processor cpu; ++ ++ /* The ISA level that the processor implements. */ ++ int isa; ++ ++ /* A mask of PTF_* values. */ ++ unsigned int tune_flags; ++}; ++ ++#include "config/loongarch/loongarch-opts.h" ++ ++/* Macros to silence warnings about numbers being signed in traditional ++ C and unsigned in ISO C when compiled on 32-bit hosts. */ ++ ++#define BITMASK_HIGH (((unsigned long)1) << 31) /* 0x80000000 */ ++#define BITMASK_UPPER16 ((unsigned long)0xffff << 16) /* 0xffff0000 */ ++#define BITMASK_LOWER16 ((unsigned long)0xffff) /* 0x0000ffff */ ++ ++ ++/* Run-time compilation parameters selecting different hardware subsets. */ ++ ++/* True if we are generating position-independent VxWorks RTP code. */ ++#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic) ++ ++/* True if we can optimize sibling calls. For simplicity, we only ++ handle cases in which call_insn_operand will reject invalid ++ sibcall addresses. There are two cases in which this isn't true: ++ ++ - TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS. call_insn_operand ++ accepts global constants, but all sibcalls must be indirect. */ ++#define TARGET_SIBCALLS (1) ++ ++/* True if we can use the J and JAL instructions. */ ++#define TARGET_ABSOLUTE_JUMPS (!flag_pic) ++ ++/* True if the output must have a writable .eh_frame. ++ See ASM_PREFERRED_EH_DATA_FORMAT for details. */ ++#ifdef HAVE_LD_PERSONALITY_RELAXATION ++#define TARGET_WRITABLE_EH_FRAME 0 ++#else ++#define TARGET_WRITABLE_EH_FRAME (flag_pic && TARGET_SHARED) ++#endif ++ ++ ++/* ISA has LSA available. */ ++#define ISA_HAS_LSA (1) ++ ++/* ISA has DLSA available. */ ++#define ISA_HAS_DLSA (TARGET_64BIT) ++ ++/* Architecture target defines. */ ++#define TARGET_LOONGARCH64 (loongarch_arch == PROCESSOR_LOONGARCH64) ++#define TUNE_LOONGARCH64 (loongarch_tune == PROCESSOR_LOONGARCH64) ++#define TARGET_GS464V (loongarch_arch == PROCESSOR_GS464V) ++#define TUNE_GS464V (loongarch_tune == PROCESSOR_GS464V) ++/* True if the pre-reload scheduler should try to create chains of ++ multiply-add or multiply-subtract instructions. For example, ++ suppose we have: ++ ++ t1 = a * b ++ t2 = t1 + c * d ++ t3 = e * f ++ t4 = t3 - g * h ++ ++ t1 will have a higher priority than t2 and t3 will have a higher ++ priority than t4. However, before reload, there is no dependence ++ between t1 and t3, and they can often have similar priorities. ++ The scheduler will then tend to prefer: ++ ++ t1 = a * b ++ t3 = e * f ++ t2 = t1 + c * d ++ t4 = t3 - g * h ++ ++ which stops us from making full use of macc/madd-style instructions. ++ This sort of situation occurs frequently in Fourier transforms and ++ in unrolled loops. ++ ++ To counter this, the TUNE_MACC_CHAINS code will reorder the ready ++ queue so that chained multiply-add and multiply-subtract instructions ++ appear ahead of any other instruction that is likely to clobber lo. ++ In the example above, if t2 and t3 become ready at the same time, ++ the code ensures that t2 is scheduled first. ++ ++ Multiply-accumulate instructions are a bigger win for some targets ++ than others, so this macro is defined on an opt-in basis. */ ++#define TUNE_MACC_CHAINS 0 ++ ++#define TARGET_OLDABI (loongarch_abi == ABILP32) ++#define TARGET_NEWABI (loongarch_abi == ABILPX32 || loongarch_abi == ABILP64) ++ ++/* TARGET_HARD_FLOAT and TARGET_SOFT_FLOAT reflect whether the FPU is ++ directly accessible, while the command-line options select ++ TARGET_HARD_FLOAT_ABI and TARGET_SOFT_FLOAT_ABI to reflect the ABI ++ in use. */ ++#define TARGET_HARD_FLOAT (TARGET_HARD_FLOAT_ABI) ++#define TARGET_SOFT_FLOAT (TARGET_SOFT_FLOAT_ABI) ++ ++/* TARGET_FLOAT64 represents -mfp64 represents ++ -mfpxx, derive TARGET_FLOAT32 to represent -mfp32. */ ++#define TARGET_FLOAT32 (!TARGET_FLOAT64) ++ ++/* False if SC acts as a memory barrier with respect to itself, ++ otherwise a SYNC will be emitted after SC for atomic operations ++ that require ordering between the SC and following loads and ++ stores. It does not tell anything about ordering of loads and ++ stores prior to and following the SC, only about the SC itself and ++ those loads and stores follow it. */ ++#define TARGET_SYNC_AFTER_SC (1) ++ ++/* Define preprocessor macros for the -march and -mtune options. ++ PREFIX is either _LARCH_ARCH or _LARCH_TUNE, INFO is the selected ++ processor. If INFO's canonical name is "foo", define PREFIX to ++ be "foo", and define an additional macro PREFIX_FOO. */ ++#define LARCH_CPP_SET_PROCESSOR(PREFIX, INFO) \ ++ do \ ++ { \ ++ char *macro, *p; \ ++ \ ++ macro = concat ((PREFIX), "_", (INFO)->name, NULL); \ ++ for (p = macro; *p != 0; p++) \ ++ if (*p == '+') \ ++ *p = 'P'; \ ++ else \ ++ *p = TOUPPER (*p); \ ++ \ ++ builtin_define (macro); \ ++ builtin_define_with_value ((PREFIX), (INFO)->name, 1); \ ++ free (macro); \ ++ } \ ++ while (0) ++ ++/* Target CPU builtins. */ ++#define TARGET_CPU_CPP_BUILTINS() loongarch_cpu_cpp_builtins (pfile) ++ ++/* Target CPU versions for D. */ ++#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions ++ ++/* Default target_flags if no switches are specified */ ++ ++#ifndef TARGET_DEFAULT ++#define TARGET_DEFAULT 0 ++#endif ++ ++#ifndef TARGET_CPU_DEFAULT ++#define TARGET_CPU_DEFAULT 0 ++#endif ++ ++#ifndef LARCH_CPU_STRING_DEFAULT ++#define LARCH_CPU_STRING_DEFAULT "gs464v" ++#endif ++#ifndef TARGET_ENDIAN_DEFAULT ++#define TARGET_ENDIAN_DEFAULT MASK_BIG_ENDIAN ++#endif ++ ++#ifdef IN_LIBGCC2 ++#undef TARGET_64BIT ++/* Make this compile time constant for libgcc2 */ ++#ifdef __loongarch64 ++#define TARGET_64BIT 1 ++#else ++#define TARGET_64BIT 0 ++#endif ++#endif /* IN_LIBGCC2 */ ++ ++#define TARGET_LIBGCC_SDATA_SECTION ".sdata" ++ ++#ifndef MULTILIB_ENDIAN_DEFAULT ++#if TARGET_ENDIAN_DEFAULT == 0 ++#define MULTILIB_ENDIAN_DEFAULT "EL" ++#else ++#define MULTILIB_ENDIAN_DEFAULT "EB" ++#endif ++#endif ++ ++#ifndef MULTILIB_ISA_DEFAULT ++#define MULTILIB_ISA_DEFAULT "loongarch64" ++#endif ++ ++#ifndef LARCH_ABI_DEFAULT ++#define LARCH_ABI_DEFAULT ABILP32 ++#endif ++ ++/* Use the most portable ABI flag for the ASM specs. */ ++ ++#if LARCH_ABI_DEFAULT == ABILP32 ++#define MULTILIB_ABI_DEFAULT "mabi=lp32" ++#elif LARCH_ABI_DEFAULT == ABILP64 ++#define MULTILIB_ABI_DEFAULT "mabi=lp64" ++#endif ++ ++#ifndef MULTILIB_DEFAULTS ++#define MULTILIB_DEFAULTS \ ++ { MULTILIB_ENDIAN_DEFAULT, MULTILIB_ISA_DEFAULT, MULTILIB_ABI_DEFAULT } ++#endif ++ ++/* We must pass -EL to the linker by default for little endian embedded ++ targets using linker scripts with a OUTPUT_FORMAT line. Otherwise, the ++ linker will default to using big-endian output files. The OUTPUT_FORMAT ++ line must be in the linker script, otherwise -EB/-EL will not work. */ ++ ++// #ifndef ENDIAN_SPEC ++// #if TARGET_ENDIAN_DEFAULT == 0 ++// #define ENDIAN_SPEC "%{!EB:%{!meb:-EL}} %{EB|meb:-EB}" ++// #else ++// #define ENDIAN_SPEC "%{!EL:%{!mel:-EB}} %{EL|mel:-EL}" ++// #endif ++// #endif ++ ++/* A spec condition that matches all -loongarch arguments. */ ++ ++#define LARCH_ISA_LEVEL_OPTION_SPEC \ ++ "loongarch" ++ ++/* A spec condition that matches all architecture arguments. */ ++ ++#define LARCH_ARCH_OPTION_SPEC \ ++ LARCH_ISA_LEVEL_OPTION_SPEC "|march=*" ++ ++/* A spec that infers a -loongarch argument from an -march argument. */ ++ ++#define LARCH_ISA_LEVEL_SPEC \ ++ "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;:}" ++ ++/* A spec that injects the default multilib ISA if no architecture is ++ specified. */ ++ ++#define LARCH_DEFAULT_ISA_LEVEL_SPEC \ ++ "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;: \ ++ %{!march=*: -" MULTILIB_ISA_DEFAULT "}}" ++ ++/* A spec that infers a -mhard-float or -msoft-float setting from an ++ -march argument. Note that soft-float and hard-float code are not ++ link-compatible. */ ++ ++#define LARCH_ARCH_FLOAT_SPEC \ ++ "%{mhard-float|msoft-float|mno-float|march=loongarch*:; \ ++ march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \ ++ |march=34kc|march=34kn|march=74kc|march=1004kc|march=5kc \ ++ |march=m14k*|march=m5101|march=octeon|march=xlr: -msoft-float; \ ++ march=*: -mhard-float}" ++ ++/* A spec condition that matches 32-bit options. It only works if ++ LARCH_ISA_LEVEL_SPEC has been applied. */ ++ ++#define LARCH_32BIT_OPTION_SPEC \ ++ "loongarch1|loongarch2|loongarch32*|mgp32" ++ ++#if (LARCH_ABI_DEFAULT == ABILPX32 \ ++ || LARCH_ABI_DEFAULT == ABILP64) ++#define OPT_ARCH64 "mabi=32|mgp32:;" ++#define OPT_ARCH32 "mabi=32|mgp32" ++#else ++#define OPT_ARCH64 "mabi=o64|mabi=n32|mabi=64|mgp64" ++#define OPT_ARCH32 "mabi=o64|mabi=n32|mabi=64|mgp64:;" ++#endif ++ ++/* Support for a compile-time default CPU, et cetera. The rules are: ++ --with-arch is ignored if -march is specified or a -loongarch is specified ++ ; likewise --with-arch-32 and --with-arch-64. ++ --with-tune is ignored if -mtune is specified; likewise ++ --with-tune-32 and --with-tune-64. ++ --with-abi is ignored if -mabi is specified. ++ --with-float is ignored if -mhard-float or -msoft-float are ++ specified. ++ --with-fpu is ignored if -msoft-float, -msingle-float or -mdouble-float are ++ specified. ++ --with-fp-32 is ignored if -msoft-float, -msingle-float or -mfp are ++ specified. ++ --with-divide is ignored if -mdivide-traps or -mdivide-breaks are ++ specified. */ ++#define OPTION_DEFAULT_SPECS \ ++ {"arch", "%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \ ++ {"arch_32", "%{" OPT_ARCH32 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \ ++ {"arch_64", "%{" OPT_ARCH64 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \ ++ {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ ++ {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \ ++ {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \ ++ {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ ++ {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \ ++ {"fpu", "%{!msoft-float:%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}}" }, \ ++ {"fp_32", "%{" OPT_ARCH32 \ ++ ":%{!msoft-float:%{!msingle-float:%{!mfp*:-mfp%(VALUE)}}}}}" }, \ ++ {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \ ++ {"fix-loongson3-llsc", "%{!mfix-loongson3-llsc: \ ++ %{!mno-fix-loongson3-llsc:-m%(VALUE)}}" } ++ ++ ++/* A spec that infers the: ++ -mlvz setting from a -march=gs464v argument. */ ++#define BASE_DRIVER_SELF_SPECS \ ++ LARCH_ASE_LVZ_SPEC ++ ++#define LARCH_ASE_LVZ_SPEC \ ++ "%{!mno-lvz: \ ++ %{march=gs464v: -mlvz}}" ++ ++#define DRIVER_SELF_SPECS \ ++ BASE_DRIVER_SELF_SPECS ++ ++/* from N_LARCH */ ++#define ABI_SPEC \ ++ "%{mabi=lp32:32}" \ ++ "%{mabi=lp64:64}" \ ++ ++#define STARTFILE_PREFIX_SPEC \ ++ "/lib" ABI_SPEC "/ " \ ++ "/usr/lib" ABI_SPEC "/ " \ ++ "/lib/ " \ ++ "/usr/lib/ " ++ ++/* This definition replaces the formerly used 'm' constraint with a ++ different constraint letter in order to avoid changing semantics of ++ the 'm' constraint when accepting new address formats in ++ TARGET_LEGITIMATE_ADDRESS_P. The constraint letter defined here ++ must not be used in insn definitions or inline assemblies. */ ++#define TARGET_MEM_CONSTRAINT 'w' ++ ++/* Likewise for 32-bit regs. */ ++#define ABI_NEEDS_32BIT_REGS (loongarch_abi == ABILP32) ++ ++/* True if the file format uses 64-bit symbols. At present, this is ++ only true for n64, which uses 64-bit ELF. */ ++#define FILE_HAS_64BIT_SYMBOLS (loongarch_abi == ABILP64) ++ ++/* True if symbols are 64 bits wide. This is usually determined by ++ the ABI's file format, but it can be overridden by -msym32. Note that ++ overriding the size with -msym32 changes the ABI of relocatable objects, ++ although it doesn't change the ABI of a fully-linked object. */ ++#define ABI_HAS_64BIT_SYMBOLS (FILE_HAS_64BIT_SYMBOLS \ ++ && Pmode == DImode \ ++ && !TARGET_SYM32) ++ ++/* ISA supports instructions DMUL, DMULU, DMUH, DMUHU. */ ++#define ISA_HAS_DMUL (TARGET_64BIT) ++ ++/* ISA has floating-point RECIP.fmt and RSQRT.fmt instructions. The ++ LARCH64 rev. 1 ISA says that RECIP.D and RSQRT.D are unpredictable when ++ doubles are stored in pairs of FPRs, so for safety's sake, we apply ++ this restriction to the LARCH IV ISA too. */ ++#define ISA_HAS_FP_RECIP_RSQRT(MODE) \ ++ ((MODE) == SFmode \ ++ || (TARGET_FLOAT64 \ ++ && (MODE) == DFmode)) ++ ++/* ISA has instructions for accessing top part of 64-bit fp regs. */ ++#define ISA_HAS_MXFRH (!TARGET_FLOAT32) ++ ++/* The LoongArch VZ is available. */ ++#define ISA_HAS_LVZ (TARGET_LVZ) ++ ++/* Tell collect what flags to pass to nm. */ ++#ifndef NM_FLAGS ++#define NM_FLAGS "-Bn" ++#endif ++ ++ ++/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to ++ the assembler. It may be overridden by subtargets. ++ ++ Beginning with gas 2.13, -mdebug must be passed to correctly handle ++ COFF debugging info. */ ++ ++#ifndef SUBTARGET_ASM_DEBUGGING_SPEC ++#define SUBTARGET_ASM_DEBUGGING_SPEC "\ ++%{g} %{g0} %{g1} %{g2} %{g3} \ ++%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \ ++%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \ ++%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3}" ++#endif ++ ++/* FP_ASM_SPEC represents the floating-point options that must be passed ++ to the assembler when FPXX support exists. Prior to that point the ++ assembler could accept the options but were not required for ++ correctness. We only add the options when absolutely necessary ++ because passing -msoft-float to the assembler will cause it to reject ++ all hard-float instructions which may require some user code to be ++ updated. */ ++ ++#ifdef HAVE_AS_DOT_MODULE ++#define FP_ASM_SPEC "\ ++%{mhard-float} %{msoft-float} \ ++%{msingle-float} %{mdouble-float}" ++#else ++#define FP_ASM_SPEC ++#endif ++ ++/* SUBTARGET_ASM_SPEC is always passed to the assembler. It may be ++ overridden by subtargets. */ ++ ++#ifndef SUBTARGET_ASM_SPEC ++#define SUBTARGET_ASM_SPEC "" ++#endif ++ ++#undef ASM_SPEC ++#define ASM_SPEC "\ ++%{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \ ++" ++/* Extra switches sometimes passed to the linker. */ ++ ++#ifndef LINK_SPEC ++#define LINK_SPEC "" ++#endif /* LINK_SPEC defined */ ++ ++ ++/* Specs for the compiler proper */ ++ ++/* SUBTARGET_CC1_SPEC is passed to the compiler proper. It may be ++ overridden by subtargets. */ ++#ifndef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC "" ++#endif ++ ++/* CC1_SPEC is the set of arguments to pass to the compiler proper. */ ++ ++#undef CC1_SPEC ++#define CC1_SPEC "\ ++%{G*} %{EB:-meb} %{EL:-mel} %{EB:%{EL:%emay not use both -EB and -EL}} \ ++%(subtarget_cc1_spec)" ++ ++/* Preprocessor specs. */ ++ ++/* SUBTARGET_CPP_SPEC is passed to the preprocessor. It may be ++ overridden by subtargets. */ ++#ifndef SUBTARGET_CPP_SPEC ++#define SUBTARGET_CPP_SPEC "" ++#endif ++ ++#define CPP_SPEC "%(subtarget_cpp_spec)" ++ ++/* This macro defines names of additional specifications to put in the specs ++ that can be used in various specifications like CC1_SPEC. Its definition ++ is an initializer with a subgrouping for each command option. ++ ++ Each subgrouping contains a string constant, that defines the ++ specification name, and a string constant that used by the GCC driver ++ program. ++ ++ Do not define this macro if it does not need to do anything. */ ++ ++#define EXTRA_SPECS \ ++ { "subtarget_cc1_spec", SUBTARGET_CC1_SPEC }, \ ++ { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ ++ { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC }, \ ++ { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \ ++ { "asm_abi_default_spec", "-" MULTILIB_ABI_DEFAULT }, \ ++ SUBTARGET_EXTRA_SPECS ++ ++#ifndef SUBTARGET_EXTRA_SPECS ++#define SUBTARGET_EXTRA_SPECS ++#endif ++ ++#define DBX_DEBUGGING_INFO 1 /* generate stabs (OSF/rose) */ ++#define DWARF2_DEBUGGING_INFO 1 /* dwarf2 debugging info */ ++ ++#ifndef PREFERRED_DEBUGGING_TYPE ++#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG ++#endif ++ ++/* The size of DWARF addresses should be the same as the size of symbols ++ in the target file format. They shouldn't depend on things like -msym32, ++ because many DWARF consumers do not allow the mixture of address sizes ++ that one would then get from linking -msym32 code with -msym64 code. ++*/ ++#define DWARF2_ADDR_SIZE (FILE_HAS_64BIT_SYMBOLS ? 8 : 4) ++ ++/* By default, turn on GDB extensions. */ ++#define DEFAULT_GDB_EXTENSIONS 1 ++ ++/* Registers may have a prefix which can be ignored when matching ++ user asm and register definitions. */ ++#ifndef REGISTER_PREFIX ++#define REGISTER_PREFIX "$" ++#endif ++ ++/* Local compiler-generated symbols must have a prefix that the assembler ++ understands. By default, this is $, although some targets (e.g., ++ NetBSD-ELF) need to override this. */ ++ ++#ifndef LOCAL_LABEL_PREFIX ++#define LOCAL_LABEL_PREFIX "$" ++#endif ++ ++/* By default on the loongarch, external symbols do not have an underscore ++ prepended, but some targets (e.g., NetBSD) require this. */ ++ ++#ifndef USER_LABEL_PREFIX ++#define USER_LABEL_PREFIX "" ++#endif ++ ++/* On Sun 4, this limit is 2048. We use 1500 to be safe, ++ since the length can run past this up to a continuation point. */ ++#undef DBX_CONTIN_LENGTH ++#define DBX_CONTIN_LENGTH 1500 ++ ++/* How to renumber registers for dbx and gdb. */ ++#define DBX_REGISTER_NUMBER(REGNO) loongarch_dbx_regno[REGNO] ++ ++/* The mapping from gcc register number to DWARF 2 CFA column number. */ ++#define DWARF_FRAME_REGNUM(REGNO) loongarch_dwarf_regno[REGNO] ++ ++/* The DWARF 2 CFA column which tracks the return address. */ ++#define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM ++ ++/* Before the prologue, RA lives in r1. */ ++#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM) ++ ++/* Describe how we implement __builtin_eh_return. */ ++#define EH_RETURN_DATA_REGNO(N) \ ++ ((N) < (4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM) ++ ++#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, GP_ARG_FIRST + 4) ++ ++#define EH_USES(N) loongarch_eh_uses (N) ++ ++/* Offsets recorded in opcodes are a multiple of this alignment factor. ++ The default for this in 64-bit mode is 8, which causes problems with ++ SFmode register saves. */ ++#define DWARF_CIE_DATA_ALIGNMENT -4 ++ ++/* Correct the offset of automatic variables and arguments. Note that ++ the LARCH debug format wants all automatic variables and arguments ++ to be in terms of the virtual frame pointer (stack pointer before ++ any adjustment in the function), while the LARCH 3.0 linker wants ++ the frame pointer to be the stack pointer after the initial ++ adjustment. */ ++ ++#define DEBUGGER_AUTO_OFFSET(X) \ ++ loongarch_debugger_offset (X, (HOST_WIDE_INT) 0) ++#define DEBUGGER_ARG_OFFSET(OFFSET, X) \ ++ loongarch_debugger_offset (X, (HOST_WIDE_INT) OFFSET) ++ ++/* Target machine storage layout */ ++ ++#define BITS_BIG_ENDIAN 0 ++#define BYTES_BIG_ENDIAN 0 ++#define WORDS_BIG_ENDIAN 0 ++ ++#define MAX_BITS_PER_WORD 64 ++ ++/* Width of a word, in units (bytes). */ ++#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) ++#ifndef IN_LIBGCC2 ++#define MIN_UNITS_PER_WORD 4 ++#endif ++ ++/* For LARCH, width of a floating point register. */ ++#define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4) ++ ++/* The number of consecutive floating-point registers needed to store the ++ largest format supported by the FPU. */ ++#define MAX_FPRS_PER_FMT (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2) ++ ++/* The number of consecutive floating-point registers needed to store the ++ smallest format supported by the FPU. */ ++#define MIN_FPRS_PER_FMT 1 ++ ++/* The largest size of value that can be held in floating-point ++ registers and moved with a single instruction. */ ++#define UNITS_PER_HWFPVALUE \ ++ (TARGET_SOFT_FLOAT_ABI ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG) ++ ++/* The largest size of value that can be held in floating-point ++ registers. */ ++#define UNITS_PER_FPVALUE \ ++ (TARGET_SOFT_FLOAT_ABI ? 0 \ ++ : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ ++ : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) ++ ++/* The number of bytes in a double. */ ++#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) ++ ++/* Set the sizes of the core types. */ ++#define SHORT_TYPE_SIZE 16 ++#define INT_TYPE_SIZE 32 ++#define LONG_TYPE_SIZE (TARGET_LONG64 ? 64 : 32) ++#define LONG_LONG_TYPE_SIZE 64 ++ ++#define FLOAT_TYPE_SIZE 32 ++#define DOUBLE_TYPE_SIZE 64 ++#define LONG_DOUBLE_TYPE_SIZE (TARGET_NEWABI ? 128 : 64) ++ ++/* Define the sizes of fixed-point types. */ ++#define SHORT_FRACT_TYPE_SIZE 8 ++#define FRACT_TYPE_SIZE 16 ++#define LONG_FRACT_TYPE_SIZE 32 ++#define LONG_LONG_FRACT_TYPE_SIZE 64 ++ ++#define SHORT_ACCUM_TYPE_SIZE 16 ++#define ACCUM_TYPE_SIZE 32 ++#define LONG_ACCUM_TYPE_SIZE 64 ++/* FIXME. LONG_LONG_ACCUM_TYPE_SIZE should be 128 bits, but GCC ++ doesn't support 128-bit integers for LARCH32 currently. */ ++#define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64) ++ ++/* long double is not a fixed mode, but the idea is that, if we ++ support long double, we also want a 128-bit integer type. */ ++#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE ++ ++/* Width in bits of a pointer. */ ++#ifndef POINTER_SIZE ++#define POINTER_SIZE ((TARGET_LONG64 && TARGET_64BIT) ? 64 : 32) ++#endif ++ ++/* Allocation boundary (in *bits*) for storing arguments in argument list. */ ++#define PARM_BOUNDARY BITS_PER_WORD ++ ++/* Allocation boundary (in *bits*) for the code of a function. */ ++#define FUNCTION_BOUNDARY 32 ++ ++/* Alignment of field after `int : 0' in a structure. */ ++#define EMPTY_FIELD_BOUNDARY 32 ++ ++/* Every structure's size must be a multiple of this. */ ++/* 8 is observed right on a DECstation and on riscos 4.02. */ ++#define STRUCTURE_SIZE_BOUNDARY 8 ++ ++/* There is no point aligning anything to a rounder boundary than ++ LONG_DOUBLE_TYPE_SIZE. ++*/ ++#define BIGGEST_ALIGNMENT (LONG_DOUBLE_TYPE_SIZE) ++ ++/* All accesses must be aligned. */ ++#define STRICT_ALIGNMENT 1 ++ ++/* Define this if you wish to imitate the way many other C compilers ++ handle alignment of bitfields and the structures that contain ++ them. ++ ++ The behavior is that the type written for a bit-field (`int', ++ `short', or other integer type) imposes an alignment for the ++ entire structure, as if the structure really did contain an ++ ordinary field of that type. In addition, the bit-field is placed ++ within the structure so that it would fit within such a field, ++ not crossing a boundary for it. ++ ++ Thus, on most machines, a bit-field whose type is written as `int' ++ would not cross a four-byte boundary, and would force four-byte ++ alignment for the whole structure. (The alignment used may not ++ be four bytes; it is controlled by the other alignment ++ parameters.) ++ ++ If the macro is defined, its definition should be a C expression; ++ a nonzero value for the expression enables this behavior. */ ++ ++#define PCC_BITFIELD_TYPE_MATTERS 1 ++ ++/* If defined, a C expression to compute the alignment for a static ++ variable. TYPE is the data type, and ALIGN is the alignment that ++ the object would ordinarily have. The value of this macro is used ++ instead of that alignment to align the object. ++ ++ If this macro is not defined, then ALIGN is used. ++ ++ One use of this macro is to increase alignment of medium-size ++ data to make it all fit in fewer cache lines. Another is to ++ cause character arrays to be word-aligned so that `strcpy' calls ++ that copy constants to character arrays can be done inline. */ ++ ++#undef DATA_ALIGNMENT ++#define DATA_ALIGNMENT(TYPE, ALIGN) \ ++ ((((ALIGN) < BITS_PER_WORD) \ ++ && (TREE_CODE (TYPE) == ARRAY_TYPE \ ++ || TREE_CODE (TYPE) == UNION_TYPE \ ++ || TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) ++ ++/* We need this for the same reason as DATA_ALIGNMENT, namely to cause ++ character arrays to be word-aligned so that `strcpy' calls that copy ++ constants to character arrays can be done inline, and 'strcmp' can be ++ optimised to use word loads. */ ++#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ ++ DATA_ALIGNMENT (TYPE, ALIGN) ++ ++#define PAD_VARARGS_DOWN \ ++ (targetm.calls.function_arg_padding (TYPE_MODE (type), type) == PAD_DOWNWARD) ++ ++/* Define if operations between registers always perform the operation ++ on the full register even if a narrower mode is specified. */ ++#define WORD_REGISTER_OPERATIONS 1 ++ ++/* When in 64-bit mode, move insns will sign extend SImode and CCmode ++ moves. All other references are zero extended. */ ++#define LOAD_EXTEND_OP(MODE) \ ++ (TARGET_64BIT && ((MODE) == SImode || (MODE) == CCmode) \ ++ ? SIGN_EXTEND : ZERO_EXTEND) ++ ++/* Define this macro if it is advisable to hold scalars in registers ++ in a wider mode than that declared by the program. In such cases, ++ the value is constrained to be within the bounds of the declared ++ type, but kept valid in the wider mode. The signedness of the ++ extension may differ from that of the type. */ ++ ++#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ ++ if (GET_MODE_CLASS (MODE) == MODE_INT \ ++ && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ ++ { \ ++ if ((MODE) == SImode) \ ++ (UNSIGNEDP) = 0; \ ++ (MODE) = Pmode; \ ++ } ++ ++/* Pmode is always the same as ptr_mode, but not always the same as word_mode. ++ Extensions of pointers to word_mode must be signed. */ ++#define POINTERS_EXTEND_UNSIGNED false ++ ++/* Define if loading short immediate values into registers sign extends. */ ++#define SHORT_IMMEDIATES_SIGN_EXTEND 1 ++ ++/* The [d]clz instructions have the natural values at 0. */ ++ ++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++ ++/* Standard register usage. */ ++ ++/* Number of hardware registers. We have: ++ ++ - 32 integer registers ++ - 32 floating point registers ++ - 8 condition code registers ++ - 2 fake registers: ++ - ARG_POINTER_REGNUM ++ - FRAME_POINTER_REGNUM ++*/ ++ ++#define FIRST_PSEUDO_REGISTER 74 ++ ++/* By default, fix the kernel registers ($26 and $27), the global ++ pointer ($28) and the stack pointer ($29). This can change ++ depending on the command-line options. ++ ++ Regarding coprocessor registers: without evidence to the contrary, ++ it's best to assume that each coprocessor register has a unique ++ use. This can be overridden, in, e.g., loongarch_option_override or ++ TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be ++ inappropriate for a particular target. */ ++ ++#define FIXED_REGISTERS \ ++{ \ ++ 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1} ++ ++ ++/* Set up this array for o32 by default. ++ ++ Note that we don't mark $31 as a call-clobbered register. The idea is ++ that it's really the call instructions themselves which clobber $31. ++ We don't care what the called function does with it afterwards. ++ ++ This approach makes it easier to implement sibcalls. Unlike normal ++ calls, sibcalls don't clobber $31, so the register reaches the ++ called function in tact. EPILOGUE_USES says that $31 is useful ++ to the called function. */ ++ ++#define CALL_USED_REGISTERS \ ++{ \ ++ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ ++ 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} ++ ++/* Internal macros to classify a register number as to whether it's a ++ general purpose register, a floating point register, a ++ multiply/divide register, or a status register. */ ++ ++#define GP_REG_FIRST 0 ++#define GP_REG_LAST 31 ++#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1) ++#define GP_DBX_FIRST 0 ++ ++#define FP_REG_FIRST 32 ++#define FP_REG_LAST 63 ++#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1) ++#define FP_DBX_FIRST ((write_symbols == DBX_DEBUG) ? 38 : 32) ++ ++/* The DWARF 2 CFA column which tracks the return address from a ++ signal handler context. This means that to maintain backwards ++ compatibility, no hard register can be assigned this column if it ++ would need to be handled by the DWARF unwinder. */ ++#define DWARF_ALT_FRAME_RETURN_COLUMN 72 ++ ++#define ST_REG_FIRST 64 ++#define ST_REG_LAST 71 ++#define ST_REG_NUM (ST_REG_LAST - ST_REG_FIRST + 1) ++ ++#define GP_REG_P(REGNO) \ ++ ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM) ++#define M16_REG_P(REGNO) \ ++ (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 16 || (REGNO) == 17) ++#define M16STORE_REG_P(REGNO) \ ++ (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 0 || (REGNO) == 17) ++#define FP_REG_P(REGNO) \ ++ ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM) ++#define ST_REG_P(REGNO) \ ++ ((unsigned int) ((int) (REGNO) - ST_REG_FIRST) < ST_REG_NUM) ++ ++#define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X))) ++ ++ ++#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ ++ loongarch_hard_regno_rename_ok (OLD_REG, NEW_REG) ++ ++/* Select a register mode required for caller save of hard regno REGNO. */ ++#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ ++ loongarch_hard_regno_caller_save_mode (REGNO, NREGS, MODE) ++ ++/* Register to use for pushing function arguments. */ ++#define STACK_POINTER_REGNUM (GP_REG_FIRST + 3) ++ ++/* These two registers don't really exist: they get eliminated to either ++ the stack or hard frame pointer. */ ++#define ARG_POINTER_REGNUM 72 ++#define FRAME_POINTER_REGNUM 73 ++ ++#define HARD_FRAME_POINTER_REGNUM \ ++ (GP_REG_FIRST + 22) ++ ++/* FIXME: */ ++/* #define HARD_FRAME_POINTER_IS_FRAME_POINTER (HARD_FRAME_POINTER_REGNUM == FRAME_POINTER_REGNUM) */ ++/* #define HARD_FRAME_POINTER_IS_ARG_POINTER (HARD_FRAME_POINTER_REGNUM == ARG_POINTER_REGNUM) */ ++ ++#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0 ++#define HARD_FRAME_POINTER_IS_ARG_POINTER 0 ++ ++/* FIXME: */ ++/* Register in which static-chain is passed to a function. */ ++#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8 */ ++ ++#define LARCH_PROLOGUE_TEMP_REGNUM \ ++ (GP_REG_FIRST + 13) ++#define LARCH_PROLOGUE_TEMP2_REGNUM \ ++ (GP_REG_FIRST + 12) ++#define LARCH_EPILOGUE_TEMP_REGNUM \ ++ (GP_REG_FIRST + (12)) ++ ++#define LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM) ++#define LARCH_PROLOGUE_TEMP2(MODE) \ ++ gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP2_REGNUM) ++#define LARCH_EPILOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM) ++ ++/* Define this macro if it is as good or better to call a constant ++ function address than to call an address kept in a register. */ ++#define NO_FUNCTION_CSE 1 ++ ++#define THREAD_POINTER_REGNUM (GP_REG_FIRST + 2) ++ ++ ++/* Define the classes of registers for register constraints in the ++ machine description. Also define ranges of constants. ++ ++ One of the classes must always be named ALL_REGS and include all hard regs. ++ If there is more than one class, another class must be named NO_REGS ++ and contain no registers. ++ ++ The name GENERAL_REGS must be the name of a class (or an alias for ++ another name such as ALL_REGS). This is the class of registers ++ that is allowed by "g" or "r" in a register constraint. ++ Also, registers outside this class are allocated only when ++ instructions express preferences for them. ++ ++ The classes must be numbered in nondecreasing order; that is, ++ a larger-numbered class must never be contained completely ++ in a smaller-numbered class. ++ ++ For any two classes, it is very desirable that there be another ++ class that represents their union. */ ++ ++enum reg_class ++{ ++ NO_REGS, /* no registers in set */ ++ SIBCALL_REGS, /* SIBCALL_REGS */ ++ JALR_REGS, /* JALR_REGS */ ++ GR_REGS, /* integer registers */ ++ LVZ_REGS, /* integer registers except for $r0 and $r1 for lvz. */ ++ FP_REGS, /* floating point registers */ ++ ST_REGS, /* status registers (fp status) */ ++ FRAME_REGS, /* $arg and $frame */ ++ ALL_REGS, /* all registers */ ++ LIM_REG_CLASSES /* max value + 1 */ ++}; ++ ++#define N_REG_CLASSES (int) LIM_REG_CLASSES ++ ++#define GENERAL_REGS GR_REGS ++ ++/* An initializer containing the names of the register classes as C ++ string constants. These names are used in writing some of the ++ debugging dumps. */ ++ ++#define REG_CLASS_NAMES \ ++{ \ ++ "NO_REGS", \ ++ "SIBCALL_REGS", \ ++ "JALR_REGS", \ ++ "GR_REGS", \ ++ "LVZ_REGS", \ ++ "FP_REGS", \ ++ "ST_REGS", \ ++ "FRAME_REGS", \ ++ "ALL_REGS" \ ++} ++ ++/* An initializer containing the contents of the register classes, ++ as integers which are bit masks. The Nth integer specifies the ++ contents of class N. The way the integer MASK is interpreted is ++ that register R is in the class if `MASK & (1 << R)' is 1. ++ ++ When the machine has more than 32 registers, an integer does not ++ suffice. Then the integers are replaced by sub-initializers, ++ braced groupings containing several integers. Each ++ sub-initializer must be suitable as an initializer for the type ++ `HARD_REG_SET' which is defined in `hard-reg-set.h'. */ ++ ++#define REG_CLASS_CONTENTS \ ++{ \ ++ { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ ++ { 0x001ff000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \ ++ { 0xff9ffff0, 0x00000000, 0x00000000 }, /* JALR_REGS */ \ ++ { 0xffffffff, 0x00000000, 0x00000000 }, /* GR_REGS */ \ ++ { 0xfffffffc, 0x00000000, 0x00000000 }, /* LVZ_REGS */ \ ++ { 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ ++ { 0x00000000, 0x00000000, 0x000000ff }, /* ST_REGS */ \ ++ { 0x00400000, 0x00000000, 0x00000200 }, /* FRAME_REGS */ \ ++ { 0xffffffff, 0xffffffff, 0x000003ff } /* ALL_REGS */ \ ++} ++ ++ ++/* A C expression whose value is a register class containing hard ++ register REGNO. In general there is more that one such class; ++ choose a class which is "minimal", meaning that no smaller class ++ also contains the register. */ ++ ++#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[ (REGNO) ] ++ ++/* A macro whose definition is the name of the class to which a ++ valid base register must belong. A base register is one used in ++ an address which is the register value plus a displacement. */ ++ ++#define BASE_REG_CLASS (GR_REGS) ++ ++/* A macro whose definition is the name of the class to which a ++ valid index register must belong. An index register is one used ++ in an address where its value is either multiplied by a scale ++ factor or added to another register (as well as added to a ++ displacement). */ ++ ++#define INDEX_REG_CLASS NO_REGS ++ ++/* We generally want to put call-clobbered registers ahead of ++ call-saved ones. (IRA expects this.) */ ++ ++#define REG_ALLOC_ORDER \ ++{ /* Call-clobbered GPRs. */ \ ++ 12, 13, 14, 15, 16, 17, 18, 19, 20, 4, 5, 6, 7, 8, 9, 10, 11, 1, \ ++ /* The global pointer. This is call-clobbered for o32 and o64 \ ++ abicalls, call-saved for n32 and n64 abicalls, and a program \ ++ invariant otherwise. Putting it between the call-clobbered \ ++ and call-saved registers should cope with all eventualities. */ \ ++ /* Call-saved GPRs. */ \ ++ 23, 24, 25, 26, 27, 28, 29, 30, 31, \ ++ /* GPRs that can never be exposed to the register allocator. */ \ ++ 0, 2, 3, 21, 22, \ ++ /* Call-clobbered FPRs. */ \ ++ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ ++ 48, 49, 50, 51,52, 53, 54, 55, \ ++ /* FPRs that are usually call-saved. The odd ones are actually \ ++ call-clobbered for n32, but listing them ahead of the even \ ++ registers might encourage the register allocator to fragment \ ++ the available FPR pairs. We need paired FPRs to store long \ ++ doubles, so it isn't clear that using a different order \ ++ for n32 would be a win. */ \ ++ 56, 57, 58, 59, 60, 61, 62, 63, \ ++ /* None of the remaining classes have defined call-saved \ ++ registers. */ \ ++ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73} ++ ++/* True if VALUE is an unsigned 6-bit number. */ ++ ++#define UIMM6_OPERAND(VALUE) \ ++ (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0) ++ ++/* True if VALUE is a signed 10-bit number. */ ++ ++#define IMM10_OPERAND(VALUE) \ ++ ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400) ++ ++/* True if VALUE is a signed 12-bit number. */ ++ ++#define IMM12_OPERAND(VALUE) \ ++ ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000) ++ ++/* True if VALUE is a signed 16-bit number. */ ++ ++#define IMM16_OPERAND(VALUE) \ ++ ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000) ++ ++ ++/* True if VALUE is a signed 12-bit number. */ ++ ++#define SMALL_OPERAND(VALUE) \ ++ ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000) ++ ++/* True if VALUE is an unsigned 12-bit number. */ ++ ++#define SMALL_OPERAND_UNSIGNED(VALUE) \ ++ (((VALUE) & ~(unsigned HOST_WIDE_INT) 0xfff) == 0) ++ ++/* True if VALUE can be loaded into a register using LUI. */ ++ ++#define LUI_OPERAND(VALUE) \ ++ (((VALUE) | 0x7ffff000) == 0x7ffff000 \ ++ || ((VALUE) | 0x7ffff000) + 0x1000 == 0) ++ ++/* True if VALUE can be loaded into a register using LUI. */ ++ ++#define LU32I_OPERAND(VALUE) \ ++ ((((VALUE) | 0x7ffff00000000) == 0x7ffff00000000) \ ++ || ((VALUE) | 0x7ffff00000000) + 0x100000000 == 0) ++ ++/* True if VALUE can be loaded into a register using LUI. */ ++ ++#define LU52I_OPERAND(VALUE) \ ++ ((((VALUE) | 0xfff0000000000000) == 0xfff0000000000000)) ++ ++/* Return a value X with the low 12 bits clear, and such that ++ VALUE - X is a signed 12-bit value. */ ++ ++#define CONST_HIGH_PART(VALUE) \ ++ (((VALUE) + 0x800) & ~(unsigned HOST_WIDE_INT) 0xfff) ++ ++#define CONST_LOW_PART(VALUE) \ ++ ((VALUE) - CONST_HIGH_PART (VALUE)) ++ ++#define SMALL_INT(X) SMALL_OPERAND (INTVAL (X)) ++#define SMALL_INT_UNSIGNED(X) SMALL_OPERAND_UNSIGNED (INTVAL (X)) ++#define LUI_INT(X) LUI_OPERAND (INTVAL (X)) ++#define LU32I_INT(X) LU32I_OPERAND (INTVAL (X)) ++#define LU52I_INT(X) LU52I_OPERAND (INTVAL (X)) ++#define ULARCH_12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047)) ++#define LARCH_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -256, 255)) ++#define LISA_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767)) ++#define LISA_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0) ++ ++/* The HI and LO registers can only be reloaded via the general ++ registers. Condition code registers can only be loaded to the ++ general registers, and from the floating point registers. */ ++ ++#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ ++ loongarch_secondary_reload_class (CLASS, MODE, X, true) ++#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ ++ loongarch_secondary_reload_class (CLASS, MODE, X, false) ++ ++/* Return the maximum number of consecutive registers ++ needed to represent mode MODE in a register of class CLASS. */ ++ ++#define CLASS_MAX_NREGS(CLASS, MODE) loongarch_class_max_nregs (CLASS, MODE) ++ ++/* Stack layout; function entry, exit and calling. */ ++ ++#define STACK_GROWS_DOWNWARD 1 ++ ++#define FRAME_GROWS_DOWNWARD 1 ++ ++#define RETURN_ADDR_RTX loongarch_return_addr ++ ++/* Similarly, don't use the least-significant bit to tell pointers to ++ code from vtable index. */ ++ ++#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta ++ ++#define ELIMINABLE_REGS \ ++{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ ++ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ ++ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ ++ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},} ++ ++#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ ++ (OFFSET) = n_loongarch_initial_elimination_offset ((FROM), (TO)) ++ ++/* Allocate stack space for arguments at the beginning of each function. */ ++#define ACCUMULATE_OUTGOING_ARGS 1 ++ ++/* The argument pointer always points to the first argument. */ ++#define FIRST_PARM_OFFSET(FNDECL) 0 ++ ++/* o32 and o64 reserve stack space for all argument registers. */ ++#define REG_PARM_STACK_SPACE(FNDECL) \ ++ (TARGET_OLDABI \ ++ ? (MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD) \ ++ : 0) ++ ++/* Define this if it is the responsibility of the caller to ++ allocate the area reserved for arguments passed in registers. ++ If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect ++ of this macro is to determine whether the space is included in ++ `crtl->outgoing_args_size'. */ ++#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 ++ ++#define STACK_BOUNDARY (TARGET_NEWABI ? 128 : 64) ++ ++/* Symbolic macros for the registers used to return integer and floating ++ point values. */ ++ ++#define GP_RETURN (GP_REG_FIRST + 4) ++#define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0)) ++ ++#define MAX_ARGS_IN_REGISTERS (TARGET_OLDABI ? 4 : 8) ++ ++/* Symbolic macros for the first/last argument registers. */ ++ ++#define GP_ARG_FIRST (GP_REG_FIRST + 4) ++#define GP_ARG_LAST (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) ++#define FP_ARG_FIRST (FP_REG_FIRST + 0) ++#define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) ++ ++/* 1 if N is a possible register number for function argument passing. ++ We have no FP argument registers when soft-float. Special handling ++ is required for O32 where only even numbered registers are used for ++ O32-FPXX and O32-FP64. */ ++ ++#define FUNCTION_ARG_REGNO_P(N) \ ++ ((IN_RANGE((N), GP_ARG_FIRST, GP_ARG_LAST) \ ++ || (IN_RANGE((N), FP_ARG_FIRST, FP_ARG_LAST) \ ++ && (loongarch_abi != ABILP32 \ ++ || TARGET_FLOAT32 \ ++ || ((N) % 2 == 0)))) \ ++ && !fixed_regs[N]) ++ ++/* This structure has to cope with two different argument allocation ++ schemes. Most LARCH ABIs view the arguments as a structure, of which ++ the first N words go in registers and the rest go on the stack. If I ++ < N, the Ith word might go in Ith integer argument register or in a ++ floating-point register. For these ABIs, we only need to remember ++ the offset of the current argument into the structure. ++ ++ So for the standard ABIs, the first N words are allocated to integer ++ registers, and loongarch_function_arg decides on an argument-by-argument ++ basis whether that argument should really go in an integer register, ++ or in a floating-point one. */ ++ ++typedef struct loongarch_args { ++ /* Always true for varargs functions. Otherwise true if at least ++ one argument has been passed in an integer register. */ ++ int gp_reg_found; ++ ++ /* The number of arguments seen so far. */ ++ unsigned int arg_number; ++ ++ /* The number of integer registers used so far. This is the number ++ of words that have been added to the argument structure, limited ++ to MAX_ARGS_IN_REGISTERS. */ ++ unsigned int num_gprs; ++ ++ unsigned int num_fprs; ++ ++ /* The number of words passed on the stack. */ ++ unsigned int stack_words; ++ ++ /* On the loongarch16, we need to keep track of which floating point ++ arguments were passed in general registers, but would have been ++ passed in the FP regs if this were a 32-bit function, so that we ++ can move them to the FP regs if we wind up calling a 32-bit ++ function. We record this information in fp_code, encoded in base ++ four. A zero digit means no floating point argument, a one digit ++ means an SFmode argument, and a two digit means a DFmode argument, ++ and a three digit is not used. The low order digit is the first ++ argument. Thus 6 == 1 * 4 + 2 means a DFmode argument followed by ++ an SFmode argument. ??? A more sophisticated approach will be ++ needed if LARCH_ABI != ABILP32. */ ++ int fp_code; ++ ++ /* True if the function has a prototype. */ ++ int prototype; ++} CUMULATIVE_ARGS; ++ ++/* Initialize a variable CUM of type CUMULATIVE_ARGS ++ for a call to a function whose data type is FNTYPE. ++ For a library call, FNTYPE is 0. */ ++ ++#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ ++ memset (&(CUM), 0, sizeof (CUM)) ++ ++ ++#define EPILOGUE_USES(REGNO) loongarch_epilogue_uses (REGNO) ++ ++/* Treat LOC as a byte offset from the stack pointer and round it up ++ to the next fully-aligned offset. */ ++#define LARCH_STACK_ALIGN(LOC) \ ++ (TARGET_NEWABI ? ROUND_UP ((LOC), 16) : ROUND_UP ((LOC), 8)) ++ ++ ++/* Output assembler code to FILE to increment profiler label # LABELNO ++ for profiling a function entry. */ ++ ++#define MCOUNT_NAME "_mcount" ++ ++/* Emit rtl for profiling. Output assembler code to FILE ++ to call "_mcount" for profiling a function entry. */ ++#define PROFILE_HOOK(LABEL) \ ++ { \ ++ rtx fun, ra; \ ++ ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); \ ++ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ ++ emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode); \ ++ } ++ ++/* All the work done in PROFILE_HOOK, but still required. */ ++#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) ++ ++ ++/* The profiler preserves all interesting registers, including $31. */ ++#define LARCH_SAVE_REG_FOR_PROFILING_P(REGNO) false ++ ++/* No loongarch port has ever used the profiler counter word, so don't emit it ++ or the label for it. */ ++ ++#define NO_PROFILE_COUNTERS 1 ++ ++/* Define this macro if the code for function profiling should come ++ before the function prologue. Normally, the profiling code comes ++ after. */ ++ ++/* #define PROFILE_BEFORE_PROLOGUE */ ++ ++/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, ++ the stack pointer does not matter. The value is tested only in ++ functions that have frame pointers. ++ No definition is equivalent to always zero. */ ++ ++#define EXIT_IGNORE_STACK 1 ++ ++ ++/* Trampolines are a block of code followed by two pointers. */ ++ ++#define TRAMPOLINE_SIZE \ ++ (loongarch_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2) ++ ++/* Forcing a 64-bit alignment for 32-bit targets allows us to load two ++ pointers from a single LUI base. */ ++ ++#define TRAMPOLINE_ALIGNMENT 64 ++ ++/* loongarch_trampoline_init calls this library function to flush ++ program and data caches. */ ++ ++#ifndef CACHE_FLUSH_FUNC ++#define CACHE_FLUSH_FUNC "_flush_cache" ++#endif ++ ++#define LARCH_ICACHE_SYNC(ADDR, SIZE) \ ++ /* Flush both caches. We need to flush the data cache in case \ ++ the system has a write-back cache. */ \ ++ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, loongarch_cache_flush_func), \ ++ LCT_NORMAL, VOIDmode, ADDR, Pmode, SIZE, Pmode, \ ++ GEN_INT (3), TYPE_MODE (integer_type_node)) ++ ++ ++/* Addressing modes, and classification of registers for them. */ ++ ++#define REGNO_OK_FOR_INDEX_P(REGNO) 0 ++#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ ++ loongarch_regno_mode_ok_for_base_p (REGNO, MODE, 1) ++ ++/* Maximum number of registers that can appear in a valid memory address. */ ++ ++#define MAX_REGS_PER_ADDRESS 1 ++ ++/* Check for constness inline but use loongarch_legitimate_address_p ++ to check whether a constant really is an address. */ ++ ++#define CONSTANT_ADDRESS_P(X) \ ++ (CONSTANT_P (X) && memory_address_p (SImode, X)) ++ ++/* This handles the magic '..CURRENT_FUNCTION' symbol, which means ++ 'the start of the function that this code is output in'. */ ++ ++#define ASM_OUTPUT_LABELREF(FILE,NAME) \ ++ do { \ ++ if (strcmp (NAME, "..CURRENT_FUNCTION") == 0) \ ++ asm_fprintf ((FILE), "%U%s", \ ++ XSTR (XEXP (DECL_RTL (current_function_decl), \ ++ 0), 0)); \ ++ else \ ++ asm_fprintf ((FILE), "%U%s", (NAME)); \ ++ } while (0) ++ ++/* Flag to mark a function decl symbol that requires a long call. */ ++#define SYMBOL_FLAG_LONG_CALL (SYMBOL_FLAG_MACH_DEP << 0) ++#define SYMBOL_REF_LONG_CALL_P(X) \ ++ ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0) ++ ++/* This flag marks functions that cannot be lazily bound. */ ++#define SYMBOL_FLAG_BIND_NOW (SYMBOL_FLAG_MACH_DEP << 1) ++#define SYMBOL_REF_BIND_NOW_P(RTX) \ ++ ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_BIND_NOW) != 0) ++ ++/* True if we're generating a form of LARCH16 code in which jump tables ++ are stored in the text section and encoded as 16-bit PC-relative ++ offsets. This is only possible when general text loads are allowed, ++ since the table access itself will be an "lh" instruction. If the ++ PC-relative offsets grow too large, 32-bit offsets are used instead. */ ++ ++ ++#define CASE_VECTOR_MODE (ptr_mode) ++ ++/* Only use short offsets if their range will not overflow. */ ++#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) \ ++ (ptr_mode ? HImode : SImode) ++ ++ ++/* Define this as 1 if `char' should by default be signed; else as 0. */ ++#ifndef DEFAULT_SIGNED_CHAR ++#define DEFAULT_SIGNED_CHAR 1 ++#endif ++ ++/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets, ++ we generally don't want to use them for copying arbitrary data. ++ A single N-word move is usually the same cost as N single-word moves. */ ++#define MOVE_MAX UNITS_PER_WORD ++ ++#define MAX_MOVE_MAX 8 ++ ++/* Define this macro as a C expression which is nonzero if ++ accessing less than a word of memory (i.e. a `char' or a ++ `short') is no faster than accessing a word of memory, i.e., if ++ such access require more than one instruction or if there is no ++ difference in cost between byte and (aligned) word loads. ++ ++ On RISC machines, it tends to generate better code to define ++ this as 1, since it avoids making a QI or HI mode register. ++ ++*/ ++#define SLOW_BYTE_ACCESS (1) ++ ++/* Standard LARCH integer shifts truncate the shift amount to the ++ width of the shifted operand. However, Loongson MMI shifts ++ do not truncate the shift amount at all. */ ++#define SHIFT_COUNT_TRUNCATED (1) ++ ++ ++/* Specify the machine mode that pointers have. ++ After generation of rtl, the compiler makes no further distinction ++ between pointers and any other objects of this machine mode. */ ++ ++#ifndef Pmode ++#define Pmode (TARGET_64BIT && TARGET_LONG64 ? DImode : SImode) ++#endif ++ ++/* Give call MEMs SImode since it is the "most permissive" mode ++ for both 32-bit and 64-bit targets. */ ++ ++#define FUNCTION_MODE SImode ++ ++ ++/* We allocate $fcc registers by hand and can't cope with moves of ++ CCmode registers to and from pseudos (or memory). */ ++#define AVOID_CCMODE_COPIES ++ ++/* A C expression for the cost of a branch instruction. A value of ++ 1 is the default; other values are interpreted relative to that. */ ++ ++#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost ++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 ++ ++/* The LARCH port has several functions that return an instruction count. ++ Multiplying the count by this value gives the number of bytes that ++ the instructions occupy. */ ++#define BASE_INSN_LENGTH (4) ++ ++/* The length of a NOP in bytes. */ ++#define NOP_INSN_LENGTH (4) ++ ++/* If defined, modifies the length assigned to instruction INSN as a ++ function of the context in which it is used. LENGTH is an lvalue ++ that contains the initially computed length of the insn and should ++ be updated with the correct length of the insn. */ ++#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ ++ ((LENGTH) = loongarch_adjust_insn_length ((INSN), (LENGTH))) ++ ++/* Return the asm template for a conditional branch instruction. ++ OPCODE is the opcode's mnemonic and OPERANDS is the asm template for ++ its operands. */ ++#define LARCH_BRANCH(OPCODE, OPERANDS) \ ++ OPCODE "\t" OPERANDS ++ ++#define LARCH_BRANCH_C(OPCODE, OPERANDS) \ ++ OPCODE "%:\t" OPERANDS ++ ++/* Return an asm string that forces INSN to be treated as an absolute ++ J or JAL instruction instead of an assembler macro. */ ++#define LARCH_ABSOLUTE_JUMP(INSN) INSN ++ ++ ++/* Control the assembler format that we output. */ ++ ++/* Output to assembler file text saying following lines ++ may contain character constants, extra white space, comments, etc. */ ++ ++#ifndef ASM_APP_ON ++#define ASM_APP_ON " #APP\n" ++#endif ++ ++/* Output to assembler file text saying following lines ++ no longer contain unusual constructs. */ ++ ++#ifndef ASM_APP_OFF ++#define ASM_APP_OFF " #NO_APP\n" ++#endif ++ ++#define REGISTER_NAMES \ ++{ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ ++ "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", \ ++ "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ ++ "$r24", "$r25", "$r26", "$r27", "$r28", "$r29", "$r30", "$r31", \ ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", \ ++ "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ ++ "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", \ ++ "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", \ ++ "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7", \ ++ "$arg", "$frame"} ++ ++/* List the "software" names for each register. Also list the numerical ++ names for $fp and $sp. */ ++ ++#define ADDITIONAL_REGISTER_NAMES \ ++{ \ ++ { "zero", 0 + GP_REG_FIRST }, \ ++ { "ra", 1 + GP_REG_FIRST }, \ ++ { "tp", 2 + GP_REG_FIRST }, \ ++ { "sp", 3 + GP_REG_FIRST }, \ ++ { "a0", 4 + GP_REG_FIRST }, \ ++ { "a1", 5 + GP_REG_FIRST }, \ ++ { "a2", 6 + GP_REG_FIRST }, \ ++ { "a3", 7 + GP_REG_FIRST }, \ ++ { "a4", 8 + GP_REG_FIRST }, \ ++ { "a5", 9 + GP_REG_FIRST }, \ ++ { "a6", 10 + GP_REG_FIRST }, \ ++ { "a7", 11 + GP_REG_FIRST }, \ ++ { "t0", 12 + GP_REG_FIRST }, \ ++ { "t1", 13 + GP_REG_FIRST }, \ ++ { "t2", 14 + GP_REG_FIRST }, \ ++ { "t3", 15 + GP_REG_FIRST }, \ ++ { "t4", 16 + GP_REG_FIRST }, \ ++ { "t5", 17 + GP_REG_FIRST }, \ ++ { "t6", 18 + GP_REG_FIRST }, \ ++ { "t7", 19 + GP_REG_FIRST }, \ ++ { "t8", 20 + GP_REG_FIRST }, \ ++ { "x", 21 + GP_REG_FIRST }, \ ++ { "fp", 22 + GP_REG_FIRST }, \ ++ { "s0", 23 + GP_REG_FIRST }, \ ++ { "s1", 24 + GP_REG_FIRST }, \ ++ { "s2", 25 + GP_REG_FIRST }, \ ++ { "s3", 26 + GP_REG_FIRST }, \ ++ { "s4", 27 + GP_REG_FIRST }, \ ++ { "s5", 28 + GP_REG_FIRST }, \ ++ { "s6", 29 + GP_REG_FIRST }, \ ++ { "s7", 30 + GP_REG_FIRST }, \ ++ { "s8", 31 + GP_REG_FIRST }, \ ++ { "v0", 4 + GP_REG_FIRST }, \ ++ { "v1", 5 + GP_REG_FIRST } \ ++} ++ ++#define DBR_OUTPUT_SEQEND(STREAM) \ ++do \ ++ { \ ++ /* Emit a blank line after the delay slot for emphasis. */ \ ++ fputs ("\n", STREAM); \ ++ } \ ++while (0) ++ ++/* The LARCH implementation uses some labels for its own purpose. The ++ following lists what labels are created, and are all formed by the ++ pattern $L[a-z].*. The machine independent portion of GCC creates ++ labels matching: $L[A-Z][0-9]+ and $L[0-9]+. ++ ++ LM[0-9]+ Silicon Graphics/ECOFF stabs label before each stmt. ++ $Lb[0-9]+ Begin blocks for LARCH debug support ++ $Lc[0-9]+ Label for use in s operation. ++ $Le[0-9]+ End blocks for LARCH debug support */ ++ ++#undef ASM_DECLARE_OBJECT_NAME ++#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \ ++ loongarch_declare_object (STREAM, NAME, "", ":\n") ++ ++/* Globalizing directive for a label. */ ++#define GLOBAL_ASM_OP "\t.globl\t" ++ ++/* This says how to define a global common symbol. */ ++ ++#define ASM_OUTPUT_ALIGNED_DECL_COMMON loongarch_output_aligned_decl_common ++ ++/* This says how to define a local common symbol (i.e., not visible to ++ linker). */ ++ ++#ifndef ASM_OUTPUT_ALIGNED_LOCAL ++#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \ ++ loongarch_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, false) ++#endif ++ ++/* This says how to output an external. It would be possible not to ++ output anything and let undefined symbol become external. However ++ the assembler uses length information on externals to allocate in ++ data/sdata bss/sbss, thereby saving exec time. */ ++ ++#undef ASM_OUTPUT_EXTERNAL ++#define ASM_OUTPUT_EXTERNAL(STREAM,DECL,NAME) \ ++ loongarch_output_external(STREAM,DECL,NAME) ++ ++/* This is how to declare a function name. The actual work of ++ emitting the label is moved to function_prologue, so that we can ++ get the line number correctly emitted before the .ent directive, ++ and after any .file directives. Define as empty so that the function ++ is not declared before the .ent directive elsewhere. */ ++ ++#undef ASM_DECLARE_FUNCTION_NAME ++#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \ ++ loongarch_declare_function_name(STREAM,NAME,DECL) ++ ++/* This is how to store into the string LABEL ++ the symbol_ref name of an internal numbered label where ++ PREFIX is the class of label and NUM is the number within the class. ++ This is suitable for output with `assemble_name'. */ ++ ++#undef ASM_GENERATE_INTERNAL_LABEL ++#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ ++ sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM)) ++ ++/* Print debug labels as "foo = ." rather than "foo:" because they should ++ represent a byte pointer rather than an ISA-encoded address. This is ++ particularly important for code like: ++ ++ $LFBxxx = . ++ .cfi_startproc ++ ... ++ .section .gcc_except_table,... ++ ... ++ .uleb128 foo-$LFBxxx ++ ++ The .uleb128 requies $LFBxxx to match the FDE start address, which is ++ likewise a byte pointer rather than an ISA-encoded address. ++ ++ At the time of writing, this hook is not used for the function end ++ label: ++ ++ $LFExxx: ++ .end foo ++ ++ */ ++ ++#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \ ++ fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM) ++ ++/* This is how to output an element of a case-vector that is absolute. */ ++ ++#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ ++ fprintf (STREAM, "\t%s\t%sL%d\n", \ ++ ptr_mode == DImode ? ".dword" : ".word", \ ++ LOCAL_LABEL_PREFIX, \ ++ VALUE) ++ ++/* This is how to output an element of a case-vector. We can make the ++ entries GP-relative when .gp(d)word is supported. */ ++ ++#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ ++do { \ ++ if (TARGET_RTP_PIC) \ ++ { \ ++ /* Make the entry relative to the start of the function. */ \ ++ rtx fnsym = XEXP (DECL_RTL (current_function_decl), 0); \ ++ fprintf (STREAM, "\t%s\t%sL%d-", \ ++ Pmode == DImode ? ".dword" : ".word", \ ++ LOCAL_LABEL_PREFIX, VALUE); \ ++ assemble_name (STREAM, XSTR (fnsym, 0)); \ ++ fprintf (STREAM, "\n"); \ ++ } \ ++ else \ ++ fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n", \ ++ ptr_mode == DImode ? ".dword" : ".word", \ ++ LOCAL_LABEL_PREFIX, VALUE, \ ++ LOCAL_LABEL_PREFIX, REL); \ ++} while (0) ++ ++/* Mark inline jump tables as data for the purpose of disassembly. For ++ simplicity embed the jump table's label number in the local symbol ++ produced so that multiple jump tables within a single function end ++ up marked with unique symbols. Retain the alignment setting from ++ `elfos.h' as we are replacing the definition from there. */ ++ ++#undef ASM_OUTPUT_BEFORE_CASE_LABEL ++#define ASM_OUTPUT_BEFORE_CASE_LABEL(STREAM, PREFIX, NUM, TABLE) \ ++ do \ ++ { \ ++ ASM_OUTPUT_ALIGN ((STREAM), 2); \ ++ if (JUMP_TABLES_IN_TEXT_SECTION) \ ++ loongarch_set_text_contents_type (STREAM, "__jump_", NUM, FALSE); \ ++ } \ ++ while (0) ++ ++/* Reset text marking to code after an inline jump table. Like with ++ the beginning of a jump table use the label number to keep symbols ++ unique. */ ++ ++#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) \ ++ do \ ++ if (JUMP_TABLES_IN_TEXT_SECTION) \ ++ loongarch_set_text_contents_type (STREAM, "__jend_", NUM, TRUE); \ ++ while (0) ++ ++/* This is how to output an assembler line ++ that says to advance the location counter ++ to a multiple of 2**LOG bytes. */ ++ ++#define ASM_OUTPUT_ALIGN(STREAM,LOG) \ ++ fprintf (STREAM, "\t.align\t%d\n", (LOG)) ++ ++#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM,LOG) \ ++ fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG)) ++ ++ ++/* This is how to output an assembler line to advance the location ++ counter by SIZE bytes. */ ++ ++#undef ASM_OUTPUT_SKIP ++#define ASM_OUTPUT_SKIP(STREAM,SIZE) \ ++ fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)) ++ ++/* This is how to output a string. */ ++#undef ASM_OUTPUT_ASCII ++#define ASM_OUTPUT_ASCII loongarch_output_ascii ++ ++ ++/* Default to -G 8 */ ++#ifndef LARCH_DEFAULT_GVALUE ++#define LARCH_DEFAULT_GVALUE 8 ++#endif ++ ++/* Define the strings to put out for each section in the object file. */ ++#define TEXT_SECTION_ASM_OP "\t.text" /* instructions */ ++#define DATA_SECTION_ASM_OP "\t.data" /* large data */ ++ ++#undef READONLY_DATA_SECTION_ASM_OP ++#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" /* read-only data */ ++ ++#define ASM_OUTPUT_REG_PUSH(STREAM,REGNO) \ ++do \ ++ { \ ++ fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,0(%s)\n", \ ++ TARGET_64BIT ? "daddiu" : "addiu", \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ TARGET_64BIT ? "sd" : "sw", \ ++ reg_names[REGNO], \ ++ reg_names[STACK_POINTER_REGNUM]); \ ++ } \ ++while (0) ++ ++#define ASM_OUTPUT_REG_POP(STREAM,REGNO) \ ++do \ ++ { \ ++ loongarch_push_asm_switch (&loongarch_noreorder); \ ++ fprintf (STREAM, "\t%s\t%s,0(%s)\n\t%s\t%s,%s,8\n", \ ++ TARGET_64BIT ? "ld" : "lw", \ ++ reg_names[REGNO], \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ TARGET_64BIT ? "daddu" : "addu", \ ++ reg_names[STACK_POINTER_REGNUM], \ ++ reg_names[STACK_POINTER_REGNUM]); \ ++ loongarch_pop_asm_switch (&loongarch_noreorder); \ ++ } \ ++while (0) ++ ++/* How to start an assembler comment. ++ The leading space is important (the loongarch native assembler requires it). */ ++#ifndef ASM_COMMENT_START ++#define ASM_COMMENT_START " #" ++#endif ++ ++#undef SIZE_TYPE ++#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int") ++ ++#undef PTRDIFF_TYPE ++#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") ++ ++/* The minimum alignment of any expanded block move. */ ++#define LARCH_MIN_MOVE_MEM_ALIGN 16 ++ ++/* The maximum number of bytes that can be copied by one iteration of ++ a movmemsi loop; see loongarch_block_move_loop. */ ++#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER \ ++ (UNITS_PER_WORD * 4) ++ ++/* The maximum number of bytes that can be copied by a straight-line ++ implementation of movmemsi; see loongarch_block_move_straight. We want ++ to make sure that any loop-based implementation will iterate at ++ least twice. */ ++#define LARCH_MAX_MOVE_BYTES_STRAIGHT \ ++ (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2) ++ ++/* The base cost of a memcpy call, for MOVE_RATIO and friends. These ++ values were determined experimentally by benchmarking with CSiBE. ++*/ ++#define LARCH_CALL_RATIO 8 ++ ++/* Any loop-based implementation of movmemsi will have at least ++ LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory ++ moves, so allow individual copies of fewer elements. ++ ++ When movmemsi is not available, use a value approximating ++ the length of a memcpy call sequence, so that move_by_pieces ++ will generate inline code if it is shorter than a function call. ++ Since move_by_pieces_ninsns counts memory-to-memory moves, but ++ we'll have to generate a load/store pair for each, halve the ++ value of LARCH_CALL_RATIO to take that into account. */ ++ ++#define MOVE_RATIO(speed) \ ++ (HAVE_movmemsi \ ++ ? LARCH_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ ++ : LARCH_CALL_RATIO / 2) ++ ++/* For CLEAR_RATIO, when optimizing for size, give a better estimate ++ of the length of a memset call, but use the default otherwise. */ ++ ++#define CLEAR_RATIO(speed)\ ++ ((speed) ? 15 : LARCH_CALL_RATIO) ++ ++/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when ++ optimizing for size adjust the ratio to account for the overhead of ++ loading the constant and replicating it across the word. */ ++ ++#define SET_RATIO(speed) \ ++ ((speed) ? 15 : LARCH_CALL_RATIO - 2) ++ ++/* Since the bits of the _init and _fini function is spread across ++ many object files, each potentially with its own GP, we must assume ++ we need to load our GP. We don't preserve $gp or $ra, since each ++ init/fini chunk is supposed to initialize $gp, and crti/crtn ++ already take care of preserving $ra and, when appropriate, $gp. */ ++#if (defined _ABI64 && _LARCH_SIM == _ABI64) ++#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ ++ asm (SECTION_OP "\n\ ++ la $t8, " USER_LABEL_PREFIX #FUNC "\n\ ++ jirl $ra, $t8, 0\n\ ++ " TEXT_SECTION_ASM_OP); ++#endif ++#ifndef HAVE_AS_TLS ++#define HAVE_AS_TLS 0 ++#endif ++ ++#ifndef HAVE_AS_NAN ++#define HAVE_AS_NAN 0 ++#endif ++ ++#ifndef USED_FOR_TARGET ++/* Information about ".set noFOO; ...; .set FOO" blocks. */ ++struct loongarch_asm_switch { ++ /* The FOO in the description above. */ ++ const char *name; ++ ++ /* The current block nesting level, or 0 if we aren't in a block. */ ++ int nesting_level; ++}; ++ ++extern const enum reg_class loongarch_regno_to_class[]; ++extern const char *current_function_file; /* filename current function is in */ ++extern int num_source_filenames; /* current .file # */ ++extern int loongarch_dbx_regno[]; ++extern int loongarch_dwarf_regno[]; ++extern bool loongarch_split_p[]; ++extern bool loongarch_use_pcrel_pool_p[]; ++extern enum processor loongarch_arch; /* which cpu to codegen for */ ++extern enum processor loongarch_tune; /* which cpu to schedule for */ ++extern int loongarch_isa; /* architectural level */ ++extern int loongarch_isa_rev; ++extern const struct loongarch_cpu_info *loongarch_arch_info; ++extern const struct loongarch_cpu_info *loongarch_tune_info; ++extern unsigned int loongarch_base_compression_flags; ++ ++/* Information about a function's frame layout. */ ++struct GTY(()) loongarch_frame_info { ++ /* The size of the frame in bytes. */ ++ HOST_WIDE_INT total_size; ++ ++ /* The number of bytes allocated to variables. */ ++ HOST_WIDE_INT var_size; ++ ++ /* The number of bytes allocated to outgoing function arguments. */ ++ HOST_WIDE_INT args_size; ++ ++ /* The number of bytes allocated to the .cprestore slot, or 0 if there ++ is no such slot. */ ++ HOST_WIDE_INT cprestore_size; ++ ++ /* Bit X is set if the function saves or restores GPR X. */ ++ unsigned int mask; ++ ++ /* Likewise FPR X. */ ++ unsigned int fmask; ++ ++ /* Likewise doubleword accumulator X ($acX). */ ++ unsigned int acc_mask; ++ ++ /* The number of GPRs, FPRs, doubleword accumulators and COP0 ++ registers saved. */ ++ unsigned int num_gp; ++ unsigned int num_fp; ++ unsigned int num_acc; ++ unsigned int num_cop0_regs; ++ ++ /* The offset of the topmost GPR, FPR, accumulator and COP0-register ++ save slots from the top of the frame, or zero if no such slots are ++ needed. */ ++ HOST_WIDE_INT gp_save_offset; ++ HOST_WIDE_INT fp_save_offset; ++ HOST_WIDE_INT acc_save_offset; ++ HOST_WIDE_INT cop0_save_offset; ++ ++ /* Likewise, but giving offsets from the bottom of the frame. */ ++ HOST_WIDE_INT gp_sp_offset; ++ HOST_WIDE_INT fp_sp_offset; ++ HOST_WIDE_INT acc_sp_offset; ++ HOST_WIDE_INT cop0_sp_offset; ++ ++ /* Similar, but the value passed to _mcount. */ ++ HOST_WIDE_INT ra_fp_offset; ++ ++ /* The offset of arg_pointer_rtx from the bottom of the frame. */ ++ HOST_WIDE_INT arg_pointer_offset; ++ ++ /* The offset of hard_frame_pointer_rtx from the bottom of the frame. */ ++ HOST_WIDE_INT hard_frame_pointer_offset; ++ ++ /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ ++ unsigned save_libcall_adjustment; ++ ++ /* Offset of virtual frame pointer from stack pointer/frame bottom */ ++ HOST_WIDE_INT frame_pointer_offset; ++}; ++ ++/* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts. */ ++enum loongarch_int_mask ++{ ++ INT_MASK_EIC = -1, ++ INT_MASK_SW0 = 0, ++ INT_MASK_SW1 = 1, ++ INT_MASK_HW0 = 2, ++ INT_MASK_HW1 = 3, ++ INT_MASK_HW2 = 4, ++ INT_MASK_HW3 = 5, ++ INT_MASK_HW4 = 6, ++ INT_MASK_HW5 = 7 ++}; ++ ++/* Enumeration to mark the existence of the shadow register set. ++ SHADOW_SET_INTSTACK indicates a shadow register set with a valid stack ++ pointer. */ ++enum loongarch_shadow_set ++{ ++ SHADOW_SET_NO, ++ SHADOW_SET_YES, ++ SHADOW_SET_INTSTACK ++}; ++ ++struct GTY(()) machine_function { ++ /* The next floating-point condition-code register to allocate ++ for 8CC targets, relative to ST_REG_FIRST. */ ++ unsigned int next_fcc; ++ ++ /* The number of extra stack bytes taken up by register varargs. ++ This area is allocated by the callee at the very top of the frame. */ ++ int varargs_size; ++ ++ /* The current frame information, calculated by loongarch_compute_frame_info. */ ++ struct loongarch_frame_info frame; ++ ++ /* How many instructions it takes to load a label into $AT, or 0 if ++ this property hasn't yet been calculated. */ ++ unsigned int load_label_num_insns; ++ ++ /* True if loongarch_adjust_insn_length should ignore an instruction's ++ hazard attribute. */ ++ bool ignore_hazard_length_p; ++ ++ /* True if the whole function is suitable for .set noreorder and ++ .set nomacro. */ ++ bool all_noreorder_p; ++ ++ /* True if the function has "inflexible" and "flexible" references ++ to the global pointer. See loongarch_cfun_has_inflexible_gp_ref_p ++ and loongarch_cfun_has_flexible_gp_ref_p for details. */ ++ bool has_inflexible_gp_insn_p; ++ bool has_flexible_gp_insn_p; ++ ++ /* True if the function's prologue must load the global pointer ++ value into pic_offset_table_rtx and store the same value in ++ the function's cprestore slot (if any). Even if this value ++ is currently false, we may decide to set it to true later; ++ see loongarch_must_initialize_gp_p () for details. */ ++ bool must_initialize_gp_p; ++ ++ /* True if the current function must restore $gp after any potential ++ clobber. This value is only meaningful during the first post-epilogue ++ split_insns pass; see loongarch_must_initialize_gp_p () for details. */ ++ bool must_restore_gp_when_clobbered_p; ++ ++ /* True if this is an interrupt handler. */ ++ bool interrupt_handler_p; ++ ++ /* Records the way in which interrupts should be masked. Only used if ++ interrupts are not kept masked. */ ++ enum loongarch_int_mask int_mask; ++ ++ /* Records if this is an interrupt handler that uses shadow registers. */ ++ enum loongarch_shadow_set use_shadow_register_set; ++ ++ /* True if this is an interrupt handler that should keep interrupts ++ masked. */ ++ bool keep_interrupts_masked_p; ++ ++ /* True if this is an interrupt handler that should use DERET ++ instead of ERET. */ ++ bool use_debug_exception_return_p; ++ ++ /* True if at least one of the formal parameters to a function must be ++ written to the frame header (probably so its address can be taken). */ ++ bool does_not_use_frame_header; ++ ++ /* True if none of the functions that are called by this function need ++ stack space allocated for their arguments. */ ++ bool optimize_call_stack; ++ ++ /* True if one of the functions calling this function may not allocate ++ a frame header. */ ++ bool callers_may_not_allocate_frame; ++ ++ /* True if GCC stored callee saved registers in the frame header. */ ++ bool use_frame_header_for_callee_saved_regs; ++}; ++#endif ++ ++/* Enable querying of DFA units. */ ++#define CPU_UNITS_QUERY 0 ++ ++/* As on most targets, we want the .eh_frame section to be read-only where ++ possible. And as on most targets, this means two things: ++ ++ (a) Non-locally-binding pointers must have an indirect encoding, ++ so that the addresses in the .eh_frame section itself become ++ locally-binding. ++ ++ (b) A shared library's .eh_frame section must encode locally-binding ++ pointers in a relative (relocation-free) form. ++ ++ However, LARCH has traditionally not allowed directives like: ++ ++ .long x-. ++ ++ in cases where "x" is in a different section, or is not defined in the ++ same assembly file. We are therefore unable to emit the PC-relative ++ form required by (b) at assembly time. ++ ++ Fortunately, the linker is able to convert absolute addresses into ++ PC-relative addresses on our behalf. Unfortunately, only certain ++ versions of the linker know how to do this for indirect pointers, ++ and for personality data. We must fall back on using writable ++ .eh_frame sections for shared libraries if the linker does not ++ support this feature. */ ++#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ ++ (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr) ++ ++#define SWITCHABLE_TARGET 1 ++ ++/* Several named LARCH patterns depend on Pmode. These patterns have the ++ form _si for Pmode == SImode and _di for Pmode == DImode. ++ Add the appropriate suffix to generator function NAME and invoke it ++ with arguments ARGS. */ ++#define PMODE_INSN(NAME, ARGS) \ ++ (Pmode == SImode ? NAME ## _si ARGS : NAME ## _di ARGS) ++ ++/* Load store bonding is not supported by fix_24k. The ++ performance can be degraded for those targets. Hence, do not bond for ++ fix_24k. */ ++#define ENABLE_LD_ST_PAIRS \ ++ (TARGET_LOAD_STORE_PAIRS) ++ ++ ++/* Do emit .note.GNU-stack by default. */ ++#ifndef NEED_INDICATE_EXEC_STACK ++#define NEED_INDICATE_EXEC_STACK 1 ++#endif ++ ++/***********************/ ++/* N_LARCH-PORT */ ++/***********************/ ++/* The `Q' extension is not yet supported. */ ++/* TODO: according to march */ ++#define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) ++ ++/* The largest type that can be passed in floating-point registers. */ ++/* TODO: according to mabi */ ++#define UNITS_PER_FP_ARG 8 ++ ++/* Internal macros to classify an ISA register's type. */ ++ ++#define GP_TEMP_FIRST (GP_REG_FIRST + 12) ++ ++#define CALLEE_SAVED_REG_NUMBER(REGNO) \ ++ ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) - 22 : -1) ++ ++#define N_LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1) ++#define N_LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, N_LARCH_PROLOGUE_TEMP_REGNUM) ++ ++#define LIBCALL_VALUE(MODE) \ ++ n_loongarch_function_value (NULL_TREE, NULL_TREE, MODE) ++ ++#define FUNCTION_VALUE(VALTYPE, FUNC) \ ++ n_loongarch_function_value (VALTYPE, FUNC, VOIDmode) ++ ++#define FRAME_GROWS_DOWNWARD 1 ++ ++#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN) +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch.md gcc-10.3.0/gcc/config/loongarch/loongarch.md +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch.md 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch.md 2022-03-23 17:40:29.349280103 +0800 +@@ -0,0 +1,4332 @@ ++;; Loongarch.md Machine Description for LARCH based processors ++;; Copyright (C) 1989-2018 Free Software Foundation, Inc. ++;; Contributed by A. Lichnewsky, lich@inria.inria.fr ++;; Changes by Michael Meissner, meissner@osf.org ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++ ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_enum "processor" [ ++ loongarch ++ loongarch64 ++ gs464v ++]) ++ ++(define_c_enum "unspec" [ ++ ;; Integer operations that are too cumbersome to describe directly. ++ UNSPEC_WSBH ++ UNSPEC_DSBH ++ UNSPEC_DSHD ++ ++ ;; Floating-point moves. ++ UNSPEC_LOAD_LOW ++ UNSPEC_LOAD_HIGH ++ UNSPEC_STORE_WORD ++ UNSPEC_MOVGR2FRH ++ UNSPEC_MOVFRH2GR ++ ++ ;; Floating-point environment. ++ UNSPEC_MOVFCSR2GR ++ UNSPEC_MOVGR2FCSR ++ ++ ;; GP manipulation. ++ UNSPEC_EH_RETURN ++ ++ ;; ++ UNSPEC_FRINT ++ UNSPEC_FCLASS ++ UNSPEC_BYTEPICK_W ++ UNSPEC_BYTEPICK_D ++ UNSPEC_BITREV_4B ++ UNSPEC_BITREV_8B ++ ++ ;; Symbolic accesses. ++ UNSPEC_LOAD_CALL ++ ++ ;; Blockage and synchronisation. ++ UNSPEC_BLOCKAGE ++ UNSPEC_DBAR ++ UNSPEC_IBAR ++ ++ ;; CPUCFG ++ UNSPEC_CPUCFG ++ UNSPEC_ASRTLE_D ++ UNSPEC_ASRTGT_D ++ ++ UNSPEC_CSRRD ++ UNSPEC_CSRWR ++ UNSPEC_CSRXCHG ++ UNSPEC_IOCSRRD ++ UNSPEC_IOCSRWR ++ ++ ;; cacop ++ UNSPEC_CACOP ++ ++ ;; pte ++ UNSPEC_LDDIR ++ UNSPEC_LDPTE ++ ++ ;; Cache manipulation. ++ UNSPEC_LARCH_CACHE ++ ++ ;; Interrupt handling. ++ UNSPEC_ERTN ++ UNSPEC_DI ++ UNSPEC_EHB ++ UNSPEC_RDPGPR ++ ++ ;; Used in a call expression in place of args_size. It's present for PIC ++ ;; indirect calls where it contains args_size and the function symbol. ++ UNSPEC_CALL_ATTR ++ ++ ++ ;; Stack checking. ++ UNSPEC_PROBE_STACK_RANGE ++ ++ ;; The `.insn' pseudo-op. ++ UNSPEC_INSN_PSEUDO ++ ++ ;; TLS ++ UNSPEC_TLS_GD ++ UNSPEC_TLS_LD ++ UNSPEC_TLS_LE ++ UNSPEC_TLS_IE ++ ++ UNSPEC_LU52I_D ++ ++ ;; FIXME: Stack tie ++ UNSPEC_TIE ++ ++ ;; CRC ++ UNSPEC_CRC ++ UNSPEC_CRCC ++]) ++ ++;; FIXME ++(define_constants ++ [(RETURN_ADDR_REGNUM 1) ++ (T0_REGNUM 12) ++ (T1_REGNUM 13) ++ (S0_REGNUM 23) ++ (S1_REGNUM 24) ++ (S2_REGNUM 25) ++ ++ ;; PIC long branch sequences are never longer than 100 bytes. ++ (MAX_PIC_BRANCH_LENGTH 100) ++]) ++ ++(include "predicates.md") ++(include "constraints.md") ++ ++;; .................... ++;; ++;; Attributes ++;; ++;; .................... ++ ++(define_attr "got" "unset,load" ++ (const_string "unset")) ++ ++;; For jal instructions, this attribute is DIRECT when the target address ++;; is symbolic and INDIRECT when it is a register. ++(define_attr "jal" "unset,direct,indirect" ++ (const_string "unset")) ++ ++ ++;; Classification of moves, extensions and truncations. Most values ++;; are as for "type" (see below) but there are also the following ++;; move-specific values: ++;; ++;; sll0 "sll DEST,SRC,0", which on 64-bit targets is guaranteed ++;; to produce a sign-extended DEST, even if SRC is not ++;; properly sign-extended ++;; pick_ins BSTRPICK.W, BSTRPICK.D, BSTRINS.W or BSTRINS.D instruction ++;; andi a single ANDI instruction ++;; shift_shift a shift left followed by a shift right ++;; ++;; This attribute is used to determine the instruction's length and ++;; scheduling type. For doubleword moves, the attribute always describes ++;; the split instructions; in some cases, it is more appropriate for the ++;; scheduling type to be "multi" instead. ++(define_attr "move_type" ++ "unknown,load,fpload,store,fpstore,mgtf,mftg,imul,move,fmove, ++ const,signext,pick_ins,logical,arith,sll0,andi,shift_shift" ++ (const_string "unknown")) ++ ++(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor" ++ (const_string "unknown")) ++ ++;; Main data type used by the insn ++(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,TF,FPSW" ++ (const_string "unknown")) ++ ++;; True if the main data type is twice the size of a word. ++(define_attr "dword_mode" "no,yes" ++ (cond [(and (eq_attr "mode" "DI,DF") ++ (not (match_test "TARGET_64BIT"))) ++ (const_string "yes") ++ ++ (and (eq_attr "mode" "TI,TF") ++ (match_test "TARGET_64BIT")) ++ (const_string "yes")] ++ (const_string "no"))) ++ ++;; True if the main data type is four times of the size of a word. ++(define_attr "qword_mode" "no,yes" ++ (cond [(and (eq_attr "mode" "TI,TF") ++ (not (match_test "TARGET_64BIT"))) ++ (const_string "yes")] ++ (const_string "no"))) ++ ++;; True if the main data type is eight times of the size of a word. ++(define_attr "oword_mode" "no,yes" ++ (cond [(and (eq_attr "mode" "OI") ++ (not (match_test "TARGET_64BIT"))) ++ (const_string "yes")] ++ (const_string "no"))) ++ ++;; Attributes describing a sync loop. These loops have the form: ++;; ++;; if (RELEASE_BARRIER == YES) sync ++;; 1: OLDVAL = *MEM ++;; if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2 ++;; CMP = 0 [delay slot] ++;; $TMP1 = OLDVAL & EXCLUSIVE_MASK ++;; $TMP2 = INSN1 (OLDVAL, INSN1_OP2) ++;; $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK) ++;; $AT |= $TMP1 | $TMP3 ++;; if (!commit (*MEM = $AT)) goto 1. ++;; if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot] ++;; CMP = 1 ++;; if (ACQUIRE_BARRIER == YES) sync ++;; 2: ++;; ++;; where "$" values are temporaries and where the other values are ++;; specified by the attributes below. Values are specified as operand ++;; numbers and insns are specified as enums. If no operand number is ++;; specified, the following values are used instead: ++;; ++;; - OLDVAL: $AT ++;; - CMP: NONE ++;; - NEWVAL: $AT ++;; - INCLUSIVE_MASK: -1 ++;; - REQUIRED_OLDVAL: OLDVAL & INCLUSIVE_MASK ++;; - EXCLUSIVE_MASK: 0 ++;; ++;; MEM and INSN1_OP2 are required. ++;; ++;; Ideally, the operand attributes would be integers, with -1 meaning "none", ++;; but the gen* programs don't yet support that. ++(define_attr "sync_mem" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_oldval" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_cmp" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_newval" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_inclusive_mask" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_exclusive_mask" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_required_oldval" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_insn1_op2" "none,0,1,2,3,4,5" (const_string "none")) ++(define_attr "sync_insn1" "move,li,addu,addiu,subu,and,andi,or,ori,xor,xori" ++ (const_string "move")) ++(define_attr "sync_insn2" "nop,and,xor,not" ++ (const_string "nop")) ++;; Memory model specifier. ++;; "0"-"9" values specify the operand that stores the memory model value. ++;; "10" specifies MEMMODEL_ACQ_REL, ++;; "11" specifies MEMMODEL_ACQUIRE. ++(define_attr "sync_memmodel" "" (const_int 10)) ++ ++;; Accumulator operand for madd patterns. ++(define_attr "accum_in" "none,0,1,2,3,4,5" (const_string "none")) ++ ++;; Classification of each insn. ++;; branch conditional branch ++;; jump unconditional jump ++;; call unconditional call ++;; load load instruction(s) ++;; fpload floating point load ++;; fpidxload floating point indexed load ++;; store store instruction(s) ++;; fpstore floating point store ++;; fpidxstore floating point indexed store ++;; prefetch memory prefetch (register + offset) ++;; prefetchx memory indexed prefetch (register + register) ++;; condmove conditional moves ++;; mgtf move generate register to float register ++;; mftg move float register to generate register ++;; const load constant ++;; arith integer arithmetic instructions ++;; logical integer logical instructions ++;; shift integer shift instructions ++;; slt set less than instructions ++;; signext sign extend instructions ++;; clz the clz and clo instructions ++;; trap trap if instructions ++;; imul integer multiply 2 operands ++;; imul3 integer multiply 3 operands ++;; idiv3 integer divide 3 operands ++;; move integer register move ({,D}ADD{,U} with rt = 0) ++;; fmove floating point register move ++;; fadd floating point add/subtract ++;; fmul floating point multiply ++;; fmadd floating point multiply-add ++;; fdiv floating point divide ++;; frdiv floating point reciprocal divide ++;; fabs floating point absolute value ++;; fneg floating point negation ++;; fcmp floating point compare ++;; fcvt floating point convert ++;; fsqrt floating point square root ++;; frsqrt floating point reciprocal square root ++;; multi multiword sequence (or user asm statements) ++;; atomic atomic memory update instruction ++;; syncloop memory atomic operation implemented as a sync loop ++;; nop no operation ++;; ghost an instruction that produces no real code ++(define_attr "type" ++ "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, ++ prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, ++ shift,slt,signext,clz,trap,imul,imul3,idiv3,move, ++ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt, ++ frsqrt,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat, ++ multi,atomic,syncloop,nop,ghost" ++ (cond [(eq_attr "jal" "!unset") (const_string "call") ++ (eq_attr "got" "load") (const_string "load") ++ ++ (eq_attr "alu_type" "add,sub") (const_string "arith") ++ ++ (eq_attr "alu_type" "not,nor,and,or,xor") (const_string "logical") ++ ++ ;; If a doubleword move uses these expensive instructions, ++ ;; it is usually better to schedule them in the same way ++ ;; as the singleword form, rather than as "multi". ++ (eq_attr "move_type" "load") (const_string "load") ++ (eq_attr "move_type" "fpload") (const_string "fpload") ++ (eq_attr "move_type" "store") (const_string "store") ++ (eq_attr "move_type" "fpstore") (const_string "fpstore") ++ (eq_attr "move_type" "mgtf") (const_string "mgtf") ++ (eq_attr "move_type" "mftg") (const_string "mftg") ++ ++ ;; These types of move are always single insns. ++ (eq_attr "move_type" "imul") (const_string "imul") ++ (eq_attr "move_type" "fmove") (const_string "fmove") ++ (eq_attr "move_type" "signext") (const_string "signext") ++ (eq_attr "move_type" "pick_ins") (const_string "arith") ++ (eq_attr "move_type" "arith") (const_string "arith") ++ (eq_attr "move_type" "logical") (const_string "logical") ++ (eq_attr "move_type" "sll0") (const_string "shift") ++ (eq_attr "move_type" "andi") (const_string "logical") ++ ++ ;; These types of move are always split. ++ (eq_attr "move_type" "shift_shift") ++ (const_string "multi") ++ ++ ;; These types of move are split for octaword modes only. ++ (and (eq_attr "move_type" "move,const") ++ (eq_attr "oword_mode" "yes")) ++ (const_string "multi") ++ ++ ;; These types of move are split for quadword modes only. ++ (and (eq_attr "move_type" "move,const") ++ (eq_attr "qword_mode" "yes")) ++ (const_string "multi") ++ ++ ;; These types of move are split for doubleword modes only. ++ (and (eq_attr "move_type" "move,const") ++ (eq_attr "dword_mode" "yes")) ++ (const_string "multi") ++ (eq_attr "move_type" "move") (const_string "move") ++ (eq_attr "move_type" "const") (const_string "const") ++ (eq_attr "sync_mem" "!none") (const_string "syncloop")] ++ (const_string "unknown"))) ++ ++(define_attr "compact_form" "always,maybe,never" ++ (cond [(eq_attr "jal" "direct") ++ (const_string "always") ++ (eq_attr "jal" "indirect") ++ (const_string "maybe") ++ (eq_attr "type" "jump") ++ (const_string "maybe")] ++ (const_string "never"))) ++ ++;; Mode for conversion types (fcvt) ++;; I2S integer to float single (SI/DI to SF) ++;; I2D integer to float double (SI/DI to DF) ++;; S2I float to integer (SF to SI/DI) ++;; D2I float to integer (DF to SI/DI) ++;; D2S double to float single ++;; S2D float single to double ++ ++(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" ++ (const_string "unknown")) ++ ++(define_attr "compression" "none,all" ++ (const_string "none")) ++ ++;; The number of individual instructions that a non-branch pattern generates, ++;; using units of BASE_INSN_LENGTH. ++(define_attr "insn_count" "" ++ (cond [;; "Ghost" instructions occupy no space. ++ (eq_attr "type" "ghost") ++ (const_int 0) ++ ++ ;; Check for doubleword moves that are decomposed into two ++ ;; instructions. ++ (and (eq_attr "move_type" "mgtf,mftg,move") ++ (eq_attr "dword_mode" "yes")) ++ (const_int 2) ++ ++ ;; Check for quadword moves that are decomposed into four ++ ;; instructions. ++ (and (eq_attr "move_type" "mgtf,mftg,move") ++ (eq_attr "qword_mode" "yes")) ++ (const_int 4) ++ ++ ;; Check for Octaword moves that are decomposed into eight ++ ;; instructions. ++ (and (eq_attr "move_type" "mgtf,mftg,move") ++ (eq_attr "oword_mode" "yes")) ++ (const_int 8) ++ ++ ;; Constants, loads and stores are handled by external routines. ++ (and (eq_attr "move_type" "const") ++ (eq_attr "dword_mode" "yes")) ++ (symbol_ref "loongarch_split_const_insns (operands[1])") ++ (eq_attr "move_type" "const") ++ (symbol_ref "loongarch_const_insns (operands[1])") ++ (eq_attr "move_type" "load,fpload") ++ (symbol_ref "loongarch_load_store_insns (operands[1], insn)") ++ (eq_attr "move_type" "store,fpstore") ++ (symbol_ref "loongarch_load_store_insns (operands[0], insn)") ++ ++ (eq_attr "type" "idiv3") ++ (symbol_ref "loongarch_idiv_insns (GET_MODE (PATTERN (insn)))")] ++(const_int 1))) ++ ++;; Length of instruction in bytes. The default is derived from "insn_count", ++;; but there are special cases for branches (which must be handled here) ++;; and for compressed single instructions. ++ ++ ++ ++(define_attr "length" "" ++ (cond [ ++ ;; Branch instructions have a range of [-0x20000,0x1fffc]. ++ ;; If a branch is outside this range, we have a choice of two ++ ;; sequences. ++ ;; ++ ;; For PIC, an out-of-range branch like: ++ ;; ++ ;; bne r1,r2,target ++ ;; ++ ;; becomes the equivalent of: ++ ;; ++ ;; beq r1,r2,1f ++ ;; la rd,target ++ ;; jirl zero,rd,0 ++ ;; 1: ++ ;; ++ ;; The non-PIC case is similar except that we use a direct ++ ;; jump instead of an la/jr pair. Since the target of this ++ ;; jump is an absolute 28-bit bit address (the other bits ++ ;; coming from the address of the delay slot) this form cannot ++ ;; cross a 256MB boundary. We could provide the option of ++ ;; using la/jr in this case too, but we do not do so at ++ ;; present. ++ ;; ++ ;; from the shorten_branches reference address. ++ (eq_attr "type" "branch") ++ (cond [;; Any variant can handle the 17-bit range. ++ (and (le (minus (match_dup 0) (pc)) (const_int 65532)) ++ (le (minus (pc) (match_dup 0)) (const_int 65534))) ++ (const_int 4) ++ ++ ;; The non-PIC case: branch, and J. ++ (match_test "TARGET_ABSOLUTE_JUMPS") ++ (const_int 8)] ++ ++ ;; Use MAX_PIC_BRANCH_LENGTH as a (gross) overestimate. ++ ;; loongarch_adjust_insn_length substitutes the correct length. ++ ;; ++ ;; Note that we can't simply use (symbol_ref ...) here ++ ;; because genattrtab needs to know the maximum length ++ ;; of an insn. ++ (const_int MAX_PIC_BRANCH_LENGTH)) ++ ] ++ (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH"))) ++ ++;; Attribute describing the processor. ++(define_enum_attr "cpu" "processor" ++ (const (symbol_ref "loongarch_tune"))) ++ ++;; The type of hardware hazard associated with this instruction. ++;; DELAY means that the next instruction cannot read the result ++;; of this one. ++(define_attr "hazard" "none,delay,forbidden_slot" ++ (const_string "none")) ++ ++;; Can the instruction be put into a delay slot? ++(define_attr "can_delay" "no,yes" ++ (if_then_else (and (eq_attr "type" "!branch,call,jump") ++ (eq_attr "hazard" "none") ++ (match_test "get_attr_insn_count (insn) == 1")) ++ (const_string "yes") ++ (const_string "no"))) ++ ++;; Describe a user's asm statement. ++(define_asm_attributes ++ [(set_attr "type" "multi") ++ (set_attr "can_delay" "no")]) ++ ++;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated ++;; from the same template. ++(define_mode_iterator GPR [SI (DI "TARGET_64BIT")]) ++ ++;; A copy of GPR that can be used when a pattern has two independent ++;; modes. ++(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")]) ++ ++;; Likewise, but for XLEN-sized quantities. ++(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) ++ ++(define_mode_iterator MOVEP1 [SI SF]) ++(define_mode_iterator MOVEP2 [SI SF]) ++(define_mode_iterator JOIN_MODE [HI ++ SI ++ (SF "TARGET_HARD_FLOAT") ++ (DF "TARGET_HARD_FLOAT ++ && TARGET_DOUBLE_FLOAT")]) ++ ++;; This mode iterator allows :P to be used for patterns that operate on ++;; pointer-sized quantities. Exactly one of the two alternatives will match. ++(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) ++ ++;; This mode iterator allows :MOVECC to be used anywhere that a ++;; conditional-move-type condition is needed. ++(define_mode_iterator MOVECC [SI (DI "TARGET_64BIT") ++ (CC "TARGET_HARD_FLOAT")]) ++ ++;; 32-bit integer moves for which we provide move patterns. ++(define_mode_iterator IMOVE32 ++ [SI]) ++ ++;; 64-bit modes for which we provide move patterns. ++(define_mode_iterator MOVE64 ++ [DI DF]) ++ ++;; 128-bit modes for which we provide move patterns on 64-bit targets. ++(define_mode_iterator MOVE128 [TI TF]) ++ ++;; This mode iterator allows the QI and HI extension patterns to be ++;; defined from the same template. ++(define_mode_iterator SHORT [QI HI]) ++ ++;; Likewise the 64-bit truncate-and-shift patterns. ++(define_mode_iterator SUBDI [QI HI SI]) ++ ++;; This mode iterator allows the QI HI SI and DI extension patterns to be ++(define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")]) ++ ++ ++;; This mode iterator allows :ANYF to be used wherever a scalar or vector ++;; floating-point mode is allowed. ++(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT") ++ (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")]) ++ ++;; Like ANYF, but only applies to scalar modes. ++(define_mode_iterator SCALARF [(SF "TARGET_HARD_FLOAT") ++ (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")]) ++ ++;; A floating-point mode for which moves involving FPRs may need to be split. ++(define_mode_iterator SPLITF ++ [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") ++ (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") ++ (TF "TARGET_64BIT && TARGET_FLOAT64")]) ++ ++;; In GPR templates, a string like "mul." will expand to "mul" in the ++;; 32-bit "mul.w" and "mul.d" in the 64-bit version. ++(define_mode_attr d [(SI "w") (DI "d")]) ++ ++;; Same as d but upper-case. ++(define_mode_attr D [(SI "") (DI "D")]) ++ ++;; This attribute gives the length suffix for a load or store instruction. ++;; The same suffixes work for zero and sign extensions. ++(define_mode_attr size [(QI "b") (HI "h") (SI "w") (DI "d")]) ++(define_mode_attr SIZE [(QI "B") (HI "H") (SI "W") (DI "D")]) ++ ++;; This attributes gives the mode mask of a SHORT. ++(define_mode_attr mask [(QI "0x00ff") (HI "0xffff")]) ++ ++;; This attributes gives the size (bits) of a SHORT. ++(define_mode_attr qi_hi [(QI "7") (HI "15")]) ++ ++;; Mode attributes for GPR loads. ++(define_mode_attr load [(SI "lw") (DI "ld")]) ++ ++(define_mode_attr load_l [(SI "ld.w") (DI "ld.d")]) ++;; Instruction names for stores. ++(define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd")]) ++ ++;; Similarly for LARCH IV indexed FPR loads and stores. ++(define_mode_attr floadx [(SF "fldx.s") (DF "fldx.d") (V2SF "fldx.d")]) ++(define_mode_attr fstorex [(SF "fstx.s") (DF "fstx.d") (V2SF "fstx.d")]) ++ ++;; Similarly for LOONGSON indexed GPR loads and stores. ++(define_mode_attr loadx [(QI "ldx.b") ++ (HI "ldx.h") ++ (SI "ldx.w") ++ (DI "ldx.d")]) ++(define_mode_attr storex [(QI "stx.b") ++ (HI "stx.h") ++ (SI "stx.w") ++ (DI "stx.d")]) ++ ++;; This attribute gives the best constraint to use for registers of ++;; a given mode. ++(define_mode_attr reg [(SI "d") (DI "d") (CC "z")]) ++ ++;; This attribute gives the format suffix for floating-point operations. ++(define_mode_attr fmt [(SF "s") (DF "d")]) ++ ++;; This attribute gives the upper-case mode name for one unit of a ++;; floating-point mode or vector mode. ++(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF")]) ++ ++;; As above, but in lower case. ++(define_mode_attr unitmode [(SF "sf") (DF "df") (V2SF "sf")]) ++ ++ ++;; This attribute gives the integer mode that has half the size of ++;; the controlling mode. ++(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI") ++ (TF "DI")]) ++ ++(define_mode_attr p [(SI "") (DI "d")]) ++ ++;; This attribute works around the early SB-1 rev2 core "F2" erratum: ++;; ++;; In certain cases, div.s and div.ps may have a rounding error ++;; and/or wrong inexact flag. ++;; ++;; Therefore, we only allow div.s if not working around SB-1 rev2 ++;; errata or if a slight loss of precision is OK. ++(define_mode_attr divide_condition ++ [DF (SF "flag_unsafe_math_optimizations")]) ++ ++;; This attribute gives the conditions under which SQRT.fmt instructions ++;; can be used. ++(define_mode_attr sqrt_condition ++ [SF DF]) ++ ++;; This code iterator allows signed and unsigned widening multiplications ++;; to use the same template. ++(define_code_iterator any_extend [sign_extend zero_extend]) ++ ++;; This code iterator allows the two right shift instructions to be ++;; generated from the same template. ++(define_code_iterator any_shiftrt [ashiftrt lshiftrt]) ++ ++;; This code iterator allows the three shift instructions to be generated ++;; from the same template. ++(define_code_iterator any_shift [ashift ashiftrt lshiftrt]) ++ ++;; This code iterator allows unsigned and signed division to be generated ++;; from the same template. ++(define_code_iterator any_div [div udiv]) ++ ++;; This code iterator allows unsigned and signed modulus to be generated ++;; from the same template. ++(define_code_iterator any_mod [mod umod]) ++ ++;; This code iterator allows addition and subtraction to be generated ++;; from the same template. ++(define_code_iterator addsub [plus minus]) ++ ++;; This code iterator allows addition and multiplication to be generated ++;; from the same template. ++(define_code_iterator addmul [plus mult]) ++ ++;; This code iterator allows addition subtraction and multiplication to be generated ++;; from the same template ++(define_code_iterator addsubmul [plus minus mult]) ++ ++;; This code iterator allows all native floating-point comparisons to be ++;; generated from the same template. ++(define_code_iterator fcond [unordered uneq unlt unle eq lt le ordered ltgt ne]) ++ ++;; This code iterator is used for comparisons that can be implemented ++;; by swapping the operands. ++(define_code_iterator swapped_fcond [ge gt unge ungt]) ++ ++;; Equality operators. ++(define_code_iterator equality_op [eq ne]) ++ ++;; These code iterators allow the signed and unsigned scc operations to use ++;; the same template. ++(define_code_iterator any_gt [gt gtu]) ++(define_code_iterator any_ge [ge geu]) ++(define_code_iterator any_lt [lt ltu]) ++(define_code_iterator any_le [le leu]) ++ ++(define_code_iterator any_return [return simple_return]) ++ ++;; expands to an empty string when doing a signed operation and ++;; "u" when doing an unsigned operation. ++(define_code_attr u [(sign_extend "") (zero_extend "u") ++ (div "") (udiv "u") ++ (mod "") (umod "u") ++ (gt "") (gtu "u") ++ (ge "") (geu "u") ++ (lt "") (ltu "u") ++ (le "") (leu "u")]) ++ ++;; is like except uppercase. ++(define_code_attr U [(sign_extend "") (zero_extend "U")]) ++ ++;; is like , but the signed form expands to "s" rather than "". ++(define_code_attr su [(sign_extend "s") (zero_extend "u")]) ++ ++;; expands to the name of the optab for a particular code. ++(define_code_attr optab [(ashift "ashl") ++ (ashiftrt "ashr") ++ (lshiftrt "lshr") ++ (ior "ior") ++ (xor "xor") ++ (and "and") ++ (plus "add") ++ (minus "sub") ++ (mult "mul") ++ (return "return") ++ (simple_return "simple_return")]) ++ ++;; expands to the name of the insn that implements a particular code. ++(define_code_attr insn [(ashift "sll") ++ (ashiftrt "sra") ++ (lshiftrt "srl") ++ (ior "or") ++ (xor "xor") ++ (and "and") ++ (plus "addu") ++ (minus "subu")]) ++ ++;; expands to the name of the insn that implements ++;; a particular code to operate on immediate values. ++(define_code_attr immediate_insn [(ior "ori") ++ (xor "xori") ++ (and "andi")]) ++ ++;; is the c.cond.fmt condition associated with a particular code. ++(define_code_attr fcond [(unordered "cun") ++ (uneq "cueq") ++ (unlt "cult") ++ (unle "cule") ++ (eq "ceq") ++ (lt "slt") ++ (le "sle") ++ (ordered "cor") ++ (ltgt "cne") ++ (ne "cune")]) ++ ++;; Similar, but for swapped conditions. ++(define_code_attr swapped_fcond [(ge "sle") ++ (gt "slt") ++ (unge "cule") ++ (ungt "cult")]) ++ ++;; The value of the bit when the branch is taken for branch_bit patterns. ++;; Comparison is always against zero so this depends on the operator. ++(define_code_attr bbv [(eq "0") (ne "1")]) ++ ++;; This is the inverse value of bbv. ++(define_code_attr bbinv [(eq "1") (ne "0")]) ++ ++;; The sel mnemonic to use depending on the condition test. ++(define_code_attr sel [(eq "masknez") (ne "maskeqz")]) ++(define_code_attr selinv [(eq "maskeqz") (ne "masknez")]) ++ ++;; Pipeline descriptions. ++;; ++;; generic.md provides a fallback for processors without a specific ++;; pipeline description. It is derived from the old define_function_unit ++;; version and uses the "alu" and "imuldiv" units declared below. ++;; ++;; Some of the processor-specific files are also derived from old ++;; define_function_unit descriptions and simply override the parts of ++;; generic.md that don't apply. The other processor-specific files ++;; are self-contained. ++(define_automaton "alu,imuldiv") ++ ++(define_cpu_unit "alu" "alu") ++(define_cpu_unit "imuldiv" "imuldiv") ++ ++;; Ghost instructions produce no real code and introduce no hazards. ++;; They exist purely to express an effect on dataflow. ++(define_insn_reservation "ghost" 0 ++ (eq_attr "type" "ghost") ++ "nothing") ++ ++(include "generic.md") ++ ++;; ++;; .................... ++;; ++;; CONDITIONAL TRAPS ++;; ++;; .................... ++;; ++ ++(define_insn "trap" ++ [(trap_if (const_int 1) (const_int 0))] ++ "" ++{ ++ return "break\t0"; ++} ++ [(set_attr "type" "trap")]) ++ ++ ++ ++;; ++;; .................... ++;; ++;; ADDITION ++;; ++;; .................... ++;; ++ ++(define_insn "add3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (plus:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fadd.\t%0,%1,%2" ++ [(set_attr "type" "fadd") ++ (set_attr "mode" "")]) ++ ++(define_expand "add3" ++ [(set (match_operand:GPR 0 "register_operand") ++ (plus:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "arith_operand")))] ++ "") ++ ++(define_insn "*add3" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d") ++ (plus:GPR (match_operand:GPR 1 "register_operand" "d,d") ++ (match_operand:GPR 2 "arith_operand" "d,Q")))] ++ "" ++{ ++ if (which_alternative == 0) ++ return "add.\t%0,%1,%2"; ++ else ++ return "addi.\t%0,%1,%2"; ++} ++ [(set_attr "alu_type" "add") ++ (set_attr "compression" "*,*") ++ (set_attr "mode" "")]) ++ ++ ++(define_insn "*addsi3_extended" ++ [(set (match_operand:DI 0 "register_operand" "=d,d") ++ (sign_extend:DI ++ (plus:SI (match_operand:SI 1 "register_operand" "d,d") ++ (match_operand:SI 2 "arith_operand" "d,Q"))))] ++ "TARGET_64BIT" ++ "@ ++ add.w\t%0,%1,%2 ++ addi.w\t%0,%1,%2" ++ [(set_attr "alu_type" "add") ++ (set_attr "mode" "SI")]) ++ ++ ++;; ++;; .................... ++;; ++;; SUBTRACTION ++;; ++;; .................... ++;; ++ ++(define_insn "sub3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (minus:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fsub.\t%0,%1,%2" ++ [(set_attr "type" "fadd") ++ (set_attr "mode" "")]) ++ ++(define_insn "sub3" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (minus:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "register_operand" "d")))] ++ "" ++ "sub.\t%0,%1,%2" ++ [(set_attr "alu_type" "sub") ++ (set_attr "compression" "*") ++ (set_attr "mode" "")]) ++ ++(define_insn "*subsi3_extended" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (sign_extend:DI ++ (minus:SI (match_operand:SI 1 "register_operand" "d") ++ (match_operand:SI 2 "register_operand" "d"))))] ++ "TARGET_64BIT" ++ "sub.w\t%0,%1,%2" ++ [(set_attr "alu_type" "sub") ++ (set_attr "mode" "DI")]) ++ ++;; ++;; .................... ++;; ++;; MULTIPLICATION ++;; ++;; .................... ++;; ++ ++(define_expand "mul3" ++ [(set (match_operand:SCALARF 0 "register_operand") ++ (mult:SCALARF (match_operand:SCALARF 1 "register_operand") ++ (match_operand:SCALARF 2 "register_operand")))] ++ "" ++ "") ++ ++(define_insn "*mul3" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f") ++ (match_operand:SCALARF 2 "register_operand" "f")))] ++ "" ++ "fmul.\t%0,%1,%2" ++ [(set_attr "type" "fmul") ++ (set_attr "mode" "")]) ++ ++(define_insn "mul3" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (mult:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "register_operand" "d")))] ++ "" ++ "mul.\t%0,%1,%2" ++ [(set_attr "type" "imul3") ++ (set_attr "mode" "")]) ++ ++ ++ ++(define_insn "mulsidi3_64bit" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))] ++ "" ++ "mul.d\t%0,%1,%2" ++ [(set_attr "type" "imul3") ++ (set_attr "mode" "DI")]) ++ ++ ++;; FIXME; Copy from n_loongarch.md. ++;;(define_insn "*mulsi3_extended" ++;; [(set (match_operand:DI 0 "register_operand" "=r") ++;; (sign_extend:DI ++;; (mult:SI (match_operand:SI 1 "register_operand" " r") ++;; (match_operand:SI 2 "register_operand" " r"))))] ++;; "TARGET_64BIT" ++;; "mulw\t%0,%1,%2" ++;; [(set_attr "type" "imul") ++;; (set_attr "mode" "SI")]) ++;; ++;;(define_insn "*mulsi3_extended2" ++;; [(set (match_operand:DI 0 "register_operand" "=r") ++;; (sign_extend:DI ++;; (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" " r") ++;; (match_operand:DI 2 "register_operand" " r")) ++;; 0)))] ++;; "TARGET_64BIT" ++;; "mulw\t%0,%1,%2" ++;; [(set_attr "type" "imul") ++;; (set_attr "mode" "SI")]) ++ ++ ++;; ++;; ........................ ++;; ++;; MULTIPLICATION HIGH-PART ++;; ++;; ........................ ++;; ++ ++ ++(define_expand "mulditi3" ++ [(set (match_operand:TI 0 "register_operand") ++ (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) ++ (any_extend:TI (match_operand:DI 2 "register_operand"))))] ++ "TARGET_64BIT" ++{ ++ rtx low = gen_reg_rtx (DImode); ++ emit_insn (gen_muldi3 (low, operands[1], operands[2])); ++ ++ rtx high = gen_reg_rtx (DImode); ++ emit_insn (gen_muldi3_highpart (high, operands[1], operands[2])); ++ ++ emit_move_insn (gen_lowpart (DImode, operands[0]), low); ++ emit_move_insn (gen_highpart (DImode, operands[0]), high); ++ DONE; ++}) ++ ++(define_insn "muldi3_highpart" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (lshiftrt:TI ++ (mult:TI (any_extend:TI ++ (match_operand:DI 1 "register_operand" " r")) ++ (any_extend:TI ++ (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 64))))] ++ "TARGET_64BIT" ++ "mulh.d\t%0,%1,%2" ++ [(set_attr "type" "imul") ++ (set_attr "mode" "DI")]) ++ ++(define_expand "mulsidi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (mult:DI (any_extend:DI ++ (match_operand:SI 1 "register_operand" " r")) ++ (any_extend:DI ++ (match_operand:SI 2 "register_operand" " r"))))] ++ "!TARGET_64BIT" ++{ ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (loongarch_subword (operands[0], true), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp)); ++ DONE; ++}) ++ ++(define_insn "mulsi3_highpart" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI (any_extend:DI ++ (match_operand:SI 1 "register_operand" " r")) ++ (any_extend:DI ++ (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 32))))] ++ "!TARGET_64BIT" ++ "mulh.w\t%0,%1,%2" ++ [(set_attr "type" "imul") ++ (set_attr "mode" "SI")]) ++ ++;; Floating point multiply accumulate instructions. ++ ++(define_expand "fma4" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (fma:ANYF (match_operand:ANYF 1 "register_operand") ++ (match_operand:ANYF 2 "register_operand") ++ (match_operand:ANYF 3 "register_operand")))] ++ "TARGET_HARD_FLOAT") ++ ++(define_insn "*fma4_madd4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ (match_operand:ANYF 3 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT" ++ "fmadd.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; The fms, fnma, and fnms instructions can be used even when HONOR_NANS ++;; is true because while IEEE 754-2008 requires the negate operation to ++;; negate the sign of a NAN and the LARCH neg instruction does not do this, ++;; the fma part of the instruction has no requirement on how the sign of ++;; a NAN is handled and so the final sign bit of the entire operation is ++;; undefined. ++ ++(define_expand "fms4" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (fma:ANYF (match_operand:ANYF 1 "register_operand") ++ (match_operand:ANYF 2 "register_operand") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand"))))] ++ "TARGET_HARD_FLOAT") ++ ++ ++(define_insn "*fms4_msub4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] ++ "TARGET_HARD_FLOAT" ++ "fmsub.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; fnma is defined in GCC as (fma (neg op1) op2 op3) ++;; (-op1 * op2) + op3 ==> -(op1 * op2) + op3 ==> -((op1 * op2) - op3) ++;; The loongarch nmsub instructions implement -((op1 * op2) - op3) ++;; This transformation means we may return the wrong signed zero ++;; so we check HONOR_SIGNED_ZEROS. ++ ++(define_expand "fnma4" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand")) ++ (match_operand:ANYF 2 "register_operand") ++ (match_operand:ANYF 3 "register_operand")))] ++ "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)") ++ ++(define_insn "*fnma4_nmsub4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (match_operand:ANYF 2 "register_operand" "f") ++ (match_operand:ANYF 3 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)" ++ "fnmsub.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; fnms is defined as: (fma (neg op1) op2 (neg op3)) ++;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3) ++;; The loongarch nmadd instructions implement -((op1 * op2) + op3) ++;; This transformation means we may return the wrong signed zero ++;; so we check HONOR_SIGNED_ZEROS. ++ ++(define_expand "fnms4" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (fma:ANYF ++ (neg:ANYF (match_operand:ANYF 1 "register_operand")) ++ (match_operand:ANYF 2 "register_operand") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand"))))] ++ "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)") ++ ++(define_insn "*fnms4_nmadd4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF ++ (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (match_operand:ANYF 2 "register_operand" "f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] ++ "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)" ++ "fnmadd.\t%0,%1,%2,%3" ++ [(set_attr "type" "fmadd") ++ (set_attr "mode" "")]) ++ ++;; ++;; .................... ++;; ++;; DIVISION and REMAINDER ++;; ++;; .................... ++;; ++ ++(define_expand "div3" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") ++ (match_operand:ANYF 2 "register_operand")))] ++ "" ++{ ++ if (const_1_operand (operands[1], mode)) ++ if (!(ISA_HAS_FP_RECIP_RSQRT (mode) ++ && flag_unsafe_math_optimizations)) ++ operands[1] = force_reg (mode, operands[1]); ++}) ++ ++;; These patterns work around the early SB-1 rev2 core "F1" erratum: ++;; ++;; If an mftg1 or dmftg1 happens to access the floating point register ++;; file at the same time a long latency operation (div, sqrt, recip, ++;; sqrt) iterates an intermediate result back through the floating ++;; point register file bypass, then instead returning the correct ++;; register value the mftg1 or dmftg1 operation returns the intermediate ++;; result of the long latency operation. ++;; ++;; The workaround is to insert an unconditional 'mov' from/to the ++;; long latency op destination register. ++ ++(define_insn "*div3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (div:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++{ ++ return "fdiv.\t%0,%1,%2"; ++} ++ [(set_attr "type" "fdiv") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++(define_insn "*recip3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" ++{ ++ return "frecip.\t%0,%2"; ++} ++ [(set_attr "type" "frdiv") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++;; Integer division and modulus. ++ ++(define_insn "div3" ++ [(set (match_operand:GPR 0 "register_operand" "=&d") ++ (any_div:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "register_operand" "d")))] ++ "" ++ { ++ return loongarch_output_division ("div.\t%0,%1,%2", operands); ++ } ++ [(set_attr "type" "idiv3") ++ (set_attr "mode" "")]) ++ ++(define_insn "mod3" ++ [(set (match_operand:GPR 0 "register_operand" "=&d") ++ (any_mod:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "register_operand" "d")))] ++ "" ++ { ++ return loongarch_output_division ("mod.\t%0,%1,%2", operands); ++ } ++ [(set_attr "type" "idiv3") ++ (set_attr "mode" "")]) ++ ++;; ++;; .................... ++;; ++;; SQUARE ROOT ++;; ++;; .................... ++ ++;; These patterns work around the early SB-1 rev2 core "F1" erratum (see ++;; "*div[sd]f3" comment for details). ++ ++(define_insn "sqrt2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++{ ++ return "fsqrt.\t%0,%1"; ++} ++ [(set_attr "type" "fsqrt") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++(define_insn "*rsqrta" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") ++ (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))] ++ "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" ++{ ++ return "frsqrt.\t%0,%2"; ++} ++ [(set_attr "type" "frsqrt") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++(define_insn "*rsqrtb" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") ++ (match_operand:ANYF 2 "register_operand" "f"))))] ++ "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" ++{ ++ return "frsqrt.\t%0,%2"; ++} ++ [(set_attr "type" "frsqrt") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ ++;; ++;; .................... ++;; ++;; ABSOLUTE VALUE ++;; ++;; .................... ++ ++;; Do not use the integer abs macro instruction, since that signals an ++;; exception on -2147483648 (sigh). ++ ++;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic ++;; instruction that treats all NaN inputs as invalid; it does not clear ++;; their sign bit. We therefore can't use that form if the signs of ++;; NaNs matter. ++ ++(define_insn "abs2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++ "fabs.\t%0,%1" ++ [(set_attr "type" "fabs") ++ (set_attr "mode" "")]) ++ ++;; ++;; ................... ++;; ++;; Count leading zeroes. ++;; ++;; ................... ++;; ++ ++(define_insn "clz2" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (clz:GPR (match_operand:GPR 1 "register_operand" "d")))] ++ "" ++ "clz.\t%0,%1" ++ [(set_attr "type" "clz") ++ (set_attr "mode" "")]) ++ ++;; ++;; ................... ++;; ++;; Count trailing zeroes. ++;; ++;; ................... ++;; ++ ++(define_insn "ctz2" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (ctz:GPR (match_operand:GPR 1 "register_operand" "d")))] ++ "" ++ "ctz.\t%0,%1" ++ [(set_attr "type" "clz") ++ (set_attr "mode" "")]) ++ ++ ++ ++;; ++;; .................... ++;; ++;; NEGATION and ONE'S COMPLEMENT ++;; ++;; .................... ++ ++(define_insn "negsi2" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (neg:SI (match_operand:SI 1 "register_operand" "d")))] ++ "" ++{ ++ return "sub.w\t%0,%.,%1"; ++} ++ [(set_attr "alu_type" "sub") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "negdi2" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (neg:DI (match_operand:DI 1 "register_operand" "d")))] ++ "TARGET_64BIT" ++ "sub.d\t%0,%.,%1" ++ [(set_attr "alu_type" "sub") ++ (set_attr "mode" "DI")]) ++ ++;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic ++;; instruction that treats all NaN inputs as invalid; it does not flip ++;; their sign bit. We therefore can't use that form if the signs of ++;; NaNs matter. ++ ++(define_insn "neg2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++ "fneg.\t%0,%1" ++ [(set_attr "type" "fneg") ++ (set_attr "mode" "")]) ++ ++(define_insn "one_cmpl2" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (not:GPR (match_operand:GPR 1 "register_operand" "d")))] ++ "" ++{ ++ return "nor\t%0,%.,%1"; ++} ++ [(set_attr "alu_type" "not") ++ (set_attr "compression" "*") ++ (set_attr "mode" "")]) ++ ++ ++;; ++;; .................... ++;; ++;; LOGICAL ++;; ++;; .................... ++;; ++ ++ ++(define_expand "and3" ++ [(set (match_operand:GPR 0 "register_operand") ++ (and:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "and_reg_operand")))]) ++ ++;; The middle-end is not allowed to convert ANDing with 0xffff_ffff into a ++;; zero_extendsidi2 because of TARGET_TRULY_NOOP_TRUNCATION, so handle these ++;; here. Note that this variant does not trigger for SI mode because we ++;; require a 64-bit HOST_WIDE_INT and 0xffff_ffff wouldn't be a canonical ++;; sign-extended SImode value. ++;; ++;; These are possible combinations for operand 1 and 2. ++;; (r=register, mem=memory, x=match, S=split): ++;; ++;; \ op1 r/EXT r/!EXT mem ++;; op2 ++;; ++;; andi x x ++;; 0xff x x x ++;; 0xffff x x x ++;; 0xffff_ffff x S x ++;; low-bitmask x ++;; register x x ++;; register =op1 ++ ++(define_insn "*and3" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d,d,d,d") ++ (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "o,o,W,d,d,d,d") ++ (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,K,Yx,Yw,d")))] ++ " and_operands_ok (mode, operands[1], operands[2])" ++{ ++ int len; ++ ++ switch (which_alternative) ++ { ++ case 0: ++ operands[1] = gen_lowpart (QImode, operands[1]); ++ return "ld.bu\t%0,%1"; ++ case 1: ++ operands[1] = gen_lowpart (HImode, operands[1]); ++ return "ld.hu\t%0,%1"; ++ case 2: ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ if (loongarch_14bit_shifted_offset_address_p (XEXP (operands[1], 0), SImode)) ++ return "ldptr.w\t%0,%1\n\tbstrins.d\t%0,$zero,63,32"; ++ else if (loongarch_12bit_offset_address_p (XEXP (operands[1], 0), SImode)) ++ return "ld.wu\t%0,%1"; ++ else ++ gcc_unreachable (); ++ case 3: ++ return "andi\t%0,%1,%x2"; ++ case 4: ++ len = low_bitmask_len (mode, INTVAL (operands[2])); ++ operands[2] = GEN_INT (len-1); ++ return "bstrpick.\t%0,%1,%2,0"; ++ case 5: ++ return "#"; ++ case 6: ++ return "and\t%0,%1,%2"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "move_type" "load,load,load,andi,pick_ins,shift_shift,logical") ++ (set_attr "compression" "*,*,*,*,*,*,*") ++ (set_attr "mode" "")]) ++ ++(define_expand "ior3" ++ [(set (match_operand:GPR 0 "register_operand") ++ (ior:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "uns_arith_operand")))] ++ "" ++{ ++}) ++ ++(define_insn "*ior3" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d") ++ (ior:GPR (match_operand:GPR 1 "register_operand" "d,d") ++ (match_operand:GPR 2 "uns_arith_operand" "d,K")))] ++ "" ++ "@ ++ or\t%0,%1,%2 ++ ori\t%0,%1,%x2" ++ [(set_attr "alu_type" "or") ++ (set_attr "compression" "*,*") ++ (set_attr "mode" "")]) ++ ++(define_insn "*iorhi3" ++ [(set (match_operand:HI 0 "register_operand" "=d,d") ++ (ior:HI (match_operand:HI 1 "register_operand" "d,d") ++ (match_operand:HI 2 "uns_arith_operand" "K,d")))] ++ "" ++ "@ ++ ori\t%0,%1,%x2 ++ or\t%0,%1,%2" ++ [(set_attr "alu_type" "or") ++ (set_attr "mode" "HI")]) ++ ++(define_expand "xor3" ++ [(set (match_operand:GPR 0 "register_operand") ++ (xor:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "uns_arith_operand")))] ++ "" ++ "") ++ ++(define_insn "*xor3" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d") ++ (xor:GPR (match_operand:GPR 1 "register_operand" "d,d") ++ (match_operand:GPR 2 "uns_arith_operand" "d,K")))] ++ "" ++ "@ ++ xor\t%0,%1,%2 ++ xori\t%0,%1,%x2" ++ [(set_attr "alu_type" "xor") ++ (set_attr "compression" "*,*") ++ (set_attr "mode" "")]) ++ ++ ++(define_insn "*nor3" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "d")) ++ (not:GPR (match_operand:GPR 2 "register_operand" "d"))))] ++ "" ++ "nor\t%0,%1,%2" ++ [(set_attr "alu_type" "nor") ++ (set_attr "mode" "")]) ++ ++;; ++;; .................... ++;; ++;; TRUNCATION ++;; ++;; .................... ++ ++ ++ ++(define_insn "truncdfsf2" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "fcvt.s.d\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "cnv_mode" "D2S") ++ (set_attr "mode" "SF")]) ++ ++;; Integer truncation patterns. Truncating SImode values to smaller ++;; modes is a no-op, as it is for most other GCC ports. Truncating ++;; DImode values to SImode is not a no-op for TARGET_64BIT since we ++;; need to make sure that the lower 32 bits are properly sign-extended ++;; (see TARGET_TRULY_NOOP_TRUNCATION). Truncating DImode values into modes ++;; smaller than SImode is equivalent to two separate truncations: ++;; ++;; A B ++;; DI ---> HI == DI ---> SI ---> HI ++;; DI ---> QI == DI ---> SI ---> QI ++;; ++;; Step A needs a real instruction but step B does not. ++ ++(define_insn "truncdisi2" ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=d,ZC,m") ++ (truncate:SI (match_operand:DI 1 "register_operand" "d,d,d")))] ++ "TARGET_64BIT" ++ "@ ++ slli.w\t%0,%1,0 ++ stptr.w\t%1,%0 ++ st.w\t%1,%0" ++ [(set_attr "move_type" "sll0,store,store") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "truncdi2" ++ [(set (match_operand:SHORT 0 "nonimmediate_operand" "=d,m") ++ (truncate:SHORT (match_operand:DI 1 "register_operand" "d,d")))] ++ "TARGET_64BIT" ++ "@ ++ slli.w\t%0,%1,0 ++ st.\t%1,%0" ++ [(set_attr "move_type" "sll0,store") ++ (set_attr "mode" "SI")]) ++ ++;; Combiner patterns to optimize shift/truncate combinations. ++ ++(define_insn "*ashr_trunc" ++ [(set (match_operand:SUBDI 0 "register_operand" "=d") ++ (truncate:SUBDI ++ (ashiftrt:DI (match_operand:DI 1 "register_operand" "d") ++ (match_operand:DI 2 "const_arith_operand" ""))))] ++ "TARGET_64BIT && IN_RANGE (INTVAL (operands[2]), 32, 63)" ++ "srai.d\t%0,%1,%2" ++ [(set_attr "type" "shift") ++ (set_attr "mode" "")]) ++ ++(define_insn "*lshr32_trunc" ++ [(set (match_operand:SUBDI 0 "register_operand" "=d") ++ (truncate:SUBDI ++ (lshiftrt:DI (match_operand:DI 1 "register_operand" "d") ++ (const_int 32))))] ++ "TARGET_64BIT" ++ "srai.d\t%0,%1,32" ++ [(set_attr "type" "shift") ++ (set_attr "mode" "")]) ++ ++ ++ ++;; ++;; .................... ++;; ++;; ZERO EXTENSION ++;; ++;; .................... ++ ++;; Extension insns. ++ ++(define_expand "zero_extendsidi2" ++ [(set (match_operand:DI 0 "register_operand") ++ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] ++ "TARGET_64BIT") ++ ++(define_insn "*zero_extendsidi2_dext" ++ [(set (match_operand:DI 0 "register_operand" "=d,d,d") ++ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,ZC,W")))] ++ "TARGET_64BIT" ++ "@ ++ bstrpick.d\t%0,%1,31,0 ++ ldptr.w\t%0,%1\n\tlu32i.d\t%0,0 ++ ld.wu\t%0,%1" ++ [(set_attr "move_type" "arith,load,load") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "1,2,1")]) ++ ++(define_insn "*zero_extendsidi2_internal" ++ [(set (match_operand:DI 0 "register_operand" "=d,d,d") ++ (subreg:DI (match_operand:SI 1 "nonimmediate_operand" "d,ZC,W") 0))] ++ "TARGET_64BIT" ++ "@ ++ bstrpick.d\t%0,%1,31,0 ++ ldptr.w\t%0,%1\n\tlu32i.d\t%0,0 ++ ld.wu\t%0,%1" ++ [(set_attr "move_type" "arith,load,load") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "1,2,1")]) ++;; See the comment before the *and3 pattern why this is generated by ++;; combine. ++ ++(define_expand "zero_extend2" ++ [(set (match_operand:GPR 0 "register_operand") ++ (zero_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))] ++ "" ++{ ++}) ++ ++(define_insn "*zero_extend2" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d") ++ (zero_extend:GPR ++ (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))] ++ "" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "bstrpick.\t%0,%1,,0"; ++ case 1: ++ return "ld.u\t%0,%1"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "move_type" "pick_ins,load") ++ (set_attr "compression" "*,*") ++ (set_attr "mode" "")]) ++ ++ ++(define_expand "zero_extendqihi2" ++ [(set (match_operand:HI 0 "register_operand") ++ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] ++ "" ++{ ++}) ++ ++(define_insn "*zero_extendqihi2" ++ [(set (match_operand:HI 0 "register_operand" "=d,d") ++ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,m")))] ++ "" ++ "@ ++ andi\t%0,%1,0x00ff ++ ld.bu\t%0,%1" ++ [(set_attr "move_type" "andi,load") ++ (set_attr "mode" "HI")]) ++ ++;; Combiner patterns to optimize truncate/zero_extend combinations. ++ ++(define_insn "*zero_extend_trunc" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (zero_extend:GPR ++ (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))] ++ "TARGET_64BIT" ++ "bstrpick.\t%0,%1,,0" ++ [(set_attr "move_type" "pick_ins") ++ (set_attr "mode" "")]) ++ ++(define_insn "*zero_extendhi_truncqi" ++ [(set (match_operand:HI 0 "register_operand" "=d") ++ (zero_extend:HI ++ (truncate:QI (match_operand:DI 1 "register_operand" "d"))))] ++ "TARGET_64BIT" ++ "andi\t%0,%1,0xff" ++ [(set_attr "alu_type" "and") ++ (set_attr "mode" "HI")]) ++ ++;; ++;; .................... ++;; ++;; SIGN EXTENSION ++;; ++;; .................... ++ ++;; Extension insns. ++;; Those for integer source operand are ordered widest source type first. ++ ++;; When TARGET_64BIT, all SImode integer and accumulator registers ++;; should already be in sign-extended form (see TARGET_TRULY_NOOP_TRUNCATION ++;; and truncdisi2). We can therefore get rid of register->register ++;; instructions if we constrain the source to be in the same register as ++;; the destination. ++;; ++;; Only the pre-reload scheduler sees the type of the register alternatives; ++;; we split them into nothing before the post-reload scheduler runs. ++;; These alternatives therefore have type "move" in order to reflect ++;; what happens if the two pre-reload operands cannot be tied, and are ++;; instead allocated two separate GPRs. We don't distinguish between ++;; the GPR and LO cases because we don't usually know during pre-reload ++;; scheduling whether an operand will be LO or not. ++(define_insn_and_split "extendsidi2" ++ [(set (match_operand:DI 0 "register_operand" "=d,d,d") ++ (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m")))] ++ "TARGET_64BIT" ++ "@ ++ # ++ ldptr.w\t%0,%1 ++ ld.w\t%0,%1" ++ "&& reload_completed && register_operand (operands[1], VOIDmode)" ++ [(const_int 0)] ++{ ++ emit_note (NOTE_INSN_DELETED); ++ DONE; ++} ++ [(set_attr "move_type" "move,load,load") ++ (set_attr "mode" "DI")]) ++ ++(define_expand "extend2" ++ [(set (match_operand:GPR 0 "register_operand") ++ (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))] ++ "") ++ ++ ++(define_insn "*extend2_se" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d") ++ (sign_extend:GPR ++ (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))] ++ "" ++ "@ ++ ext.w.\t%0,%1 ++ ld.\t%0,%1" ++ [(set_attr "move_type" "signext,load") ++ (set_attr "mode" "")]) ++ ++(define_expand "extendqihi2" ++ [(set (match_operand:HI 0 "register_operand") ++ (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] ++ "") ++ ++(define_insn "*extendqihi2_seb" ++ [(set (match_operand:HI 0 "register_operand" "=d,d") ++ (sign_extend:HI ++ (match_operand:QI 1 "nonimmediate_operand" "d,m")))] ++ "" ++ "@ ++ ext.w.b\t%0,%1 ++ ld.b\t%0,%1" ++ [(set_attr "move_type" "signext,load") ++ (set_attr "mode" "SI")]) ++ ++;; Combiner patterns for truncate/sign_extend combinations. The SI versions ++;; use the shift/truncate patterns. ++ ++(define_insn_and_split "*extenddi_truncate" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (sign_extend:DI ++ (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))] ++ "TARGET_64BIT" ++ "#" ++ "&& reload_completed" ++ [(set (match_dup 2) ++ (ashift:DI (match_dup 1) ++ (match_dup 3))) ++ (set (match_dup 0) ++ (ashiftrt:DI (match_dup 2) ++ (match_dup 3)))] ++{ ++ operands[2] = gen_lowpart (DImode, operands[0]); ++ operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (mode)); ++} ++ [(set_attr "move_type" "shift_shift") ++ (set_attr "mode" "DI")]) ++ ++(define_insn_and_split "*extendsi_truncate" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (sign_extend:SI ++ (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))] ++ "TARGET_64BIT" ++ "#" ++ "&& reload_completed" ++ [(set (match_dup 2) ++ (ashift:DI (match_dup 1) ++ (match_dup 3))) ++ (set (match_dup 0) ++ (truncate:SI (ashiftrt:DI (match_dup 2) ++ (match_dup 3))))] ++{ ++ operands[2] = gen_lowpart (DImode, operands[0]); ++ operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (mode)); ++} ++ [(set_attr "move_type" "shift_shift") ++ (set_attr "mode" "SI")]) ++ ++(define_insn_and_split "*extendhi_truncateqi" ++ [(set (match_operand:HI 0 "register_operand" "=d") ++ (sign_extend:HI ++ (truncate:QI (match_operand:DI 1 "register_operand" "d"))))] ++ "TARGET_64BIT" ++ "#" ++ "&& reload_completed" ++ [(set (match_dup 2) ++ (ashift:DI (match_dup 1) ++ (const_int 56))) ++ (set (match_dup 0) ++ (truncate:HI (ashiftrt:DI (match_dup 2) ++ (const_int 56))))] ++{ ++ operands[2] = gen_lowpart (DImode, operands[0]); ++} ++ [(set_attr "move_type" "shift_shift") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "extendsfdf2" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "fcvt.d.s\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "cnv_mode" "S2D") ++ (set_attr "mode" "DF")]) ++ ++;; ++;; .................... ++;; ++;; CONVERSIONS ++;; ++;; .................... ++ ++(define_expand "fix_truncdfsi2" ++ [(set (match_operand:SI 0 "register_operand") ++ (fix:SI (match_operand:DF 1 "register_operand")))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++"" ++) ++ ++(define_insn "fix_truncdfsi2_insn" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (fix:SI (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "ftintrz.w.d %0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "DF") ++ (set_attr "cnv_mode" "D2I")]) ++ ++ ++(define_expand "fix_truncsfsi2" ++ [(set (match_operand:SI 0 "register_operand") ++ (fix:SI (match_operand:SF 1 "register_operand")))] ++ "TARGET_HARD_FLOAT" ++"" ++) ++ ++(define_insn "fix_truncsfsi2_insn" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (fix:SI (match_operand:SF 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT" ++ "ftintrz.w.s %0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "SF") ++ (set_attr "cnv_mode" "S2I")]) ++ ++ ++(define_insn "fix_truncdfdi2" ++ [(set (match_operand:DI 0 "register_operand" "=f") ++ (fix:DI (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" ++ "ftintrz.l.d %0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "DF") ++ (set_attr "cnv_mode" "D2I")]) ++ ++ ++(define_insn "fix_truncsfdi2" ++ [(set (match_operand:DI 0 "register_operand" "=f") ++ (fix:DI (match_operand:SF 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" ++ "ftintrz.l.s %0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "SF") ++ (set_attr "cnv_mode" "S2I")]) ++ ++ ++(define_insn "floatsidf2" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (float:DF (match_operand:SI 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++ "ffint.d.w\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "DF") ++ (set_attr "cnv_mode" "I2D")]) ++ ++ ++(define_insn "floatdidf2" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (float:DF (match_operand:DI 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" ++ "ffint.d.l\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "DF") ++ (set_attr "cnv_mode" "I2D")]) ++ ++ ++(define_insn "floatsisf2" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (float:SF (match_operand:SI 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT" ++ "ffint.s.w\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "SF") ++ (set_attr "cnv_mode" "I2S")]) ++ ++ ++(define_insn "floatdisf2" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (float:SF (match_operand:DI 1 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" ++ "ffint.s.l\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "SF") ++ (set_attr "cnv_mode" "I2S")]) ++ ++ ++(define_expand "fixuns_truncdfsi2" ++ [(set (match_operand:SI 0 "register_operand") ++ (unsigned_fix:SI (match_operand:DF 1 "register_operand")))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++{ ++ rtx reg1 = gen_reg_rtx (DFmode); ++ rtx reg2 = gen_reg_rtx (DFmode); ++ rtx reg3 = gen_reg_rtx (SImode); ++ rtx_code_label *label1 = gen_label_rtx (); ++ rtx_code_label *label2 = gen_label_rtx (); ++ rtx test; ++ REAL_VALUE_TYPE offset; ++ ++ real_2expN (&offset, 31, DFmode); ++ ++ if (reg1) /* Turn off complaints about unreached code. */ ++ { ++ loongarch_emit_move (reg1, const_double_from_real_value (offset, DFmode)); ++ do_pending_stack_adjust (); ++ ++ test = gen_rtx_GE (VOIDmode, operands[1], reg1); ++ emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); ++ ++ emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1])); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, ++ gen_rtx_LABEL_REF (VOIDmode, label2))); ++ emit_barrier (); ++ ++ emit_label (label1); ++ loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); ++ loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode ++ (BITMASK_HIGH, SImode))); ++ ++ emit_insn (gen_fix_truncdfsi2 (operands[0], reg2)); ++ emit_insn (gen_iorsi3 (operands[0], operands[0], reg3)); ++ ++ emit_label (label2); ++ ++ /* Allow REG_NOTES to be set on last insn (labels don't have enough ++ fields, and can't be used for REG_NOTES anyway). */ ++ emit_use (stack_pointer_rtx); ++ DONE; ++ } ++}) ++ ++ ++(define_expand "fixuns_truncdfdi2" ++ [(set (match_operand:DI 0 "register_operand") ++ (unsigned_fix:DI (match_operand:DF 1 "register_operand")))] ++ "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT" ++{ ++ rtx reg1 = gen_reg_rtx (DFmode); ++ rtx reg2 = gen_reg_rtx (DFmode); ++ rtx reg3 = gen_reg_rtx (DImode); ++ rtx_code_label *label1 = gen_label_rtx (); ++ rtx_code_label *label2 = gen_label_rtx (); ++ rtx test; ++ REAL_VALUE_TYPE offset; ++ ++ real_2expN (&offset, 63, DFmode); ++ ++ loongarch_emit_move (reg1, const_double_from_real_value (offset, DFmode)); ++ do_pending_stack_adjust (); ++ ++ test = gen_rtx_GE (VOIDmode, operands[1], reg1); ++ emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); ++ ++ emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1])); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); ++ emit_barrier (); ++ ++ emit_label (label1); ++ loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); ++ loongarch_emit_move (reg3, GEN_INT (BITMASK_HIGH)); ++ emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32))); ++ ++ emit_insn (gen_fix_truncdfdi2 (operands[0], reg2)); ++ emit_insn (gen_iordi3 (operands[0], operands[0], reg3)); ++ ++ emit_label (label2); ++ ++ /* Allow REG_NOTES to be set on last insn (labels don't have enough ++ fields, and can't be used for REG_NOTES anyway). */ ++ emit_use (stack_pointer_rtx); ++ DONE; ++}) ++ ++ ++(define_expand "fixuns_truncsfsi2" ++ [(set (match_operand:SI 0 "register_operand") ++ (unsigned_fix:SI (match_operand:SF 1 "register_operand")))] ++ "TARGET_HARD_FLOAT" ++{ ++ rtx reg1 = gen_reg_rtx (SFmode); ++ rtx reg2 = gen_reg_rtx (SFmode); ++ rtx reg3 = gen_reg_rtx (SImode); ++ rtx_code_label *label1 = gen_label_rtx (); ++ rtx_code_label *label2 = gen_label_rtx (); ++ rtx test; ++ REAL_VALUE_TYPE offset; ++ ++ real_2expN (&offset, 31, SFmode); ++ ++ loongarch_emit_move (reg1, const_double_from_real_value (offset, SFmode)); ++ do_pending_stack_adjust (); ++ ++ test = gen_rtx_GE (VOIDmode, operands[1], reg1); ++ emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1)); ++ ++ emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1])); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); ++ emit_barrier (); ++ ++ emit_label (label1); ++ loongarch_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1)); ++ loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode ++ (BITMASK_HIGH, SImode))); ++ ++ emit_insn (gen_fix_truncsfsi2 (operands[0], reg2)); ++ emit_insn (gen_iorsi3 (operands[0], operands[0], reg3)); ++ ++ emit_label (label2); ++ ++ /* Allow REG_NOTES to be set on last insn (labels don't have enough ++ fields, and can't be used for REG_NOTES anyway). */ ++ emit_use (stack_pointer_rtx); ++ DONE; ++}) ++ ++ ++(define_expand "fixuns_truncsfdi2" ++ [(set (match_operand:DI 0 "register_operand") ++ (unsigned_fix:DI (match_operand:SF 1 "register_operand")))] ++ "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT" ++{ ++ rtx reg1 = gen_reg_rtx (SFmode); ++ rtx reg2 = gen_reg_rtx (SFmode); ++ rtx reg3 = gen_reg_rtx (DImode); ++ rtx_code_label *label1 = gen_label_rtx (); ++ rtx_code_label *label2 = gen_label_rtx (); ++ rtx test; ++ REAL_VALUE_TYPE offset; ++ ++ real_2expN (&offset, 63, SFmode); ++ ++ loongarch_emit_move (reg1, const_double_from_real_value (offset, SFmode)); ++ do_pending_stack_adjust (); ++ ++ test = gen_rtx_GE (VOIDmode, operands[1], reg1); ++ emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1)); ++ ++ emit_insn (gen_fix_truncsfdi2 (operands[0], operands[1])); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); ++ emit_barrier (); ++ ++ emit_label (label1); ++ loongarch_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1)); ++ loongarch_emit_move (reg3, GEN_INT (BITMASK_HIGH)); ++ emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32))); ++ ++ emit_insn (gen_fix_truncsfdi2 (operands[0], reg2)); ++ emit_insn (gen_iordi3 (operands[0], operands[0], reg3)); ++ ++ emit_label (label2); ++ ++ /* Allow REG_NOTES to be set on last insn (labels don't have enough ++ fields, and can't be used for REG_NOTES anyway). */ ++ emit_use (stack_pointer_rtx); ++ DONE; ++}) ++ ++;; ++;; .................... ++;; ++;; DATA MOVEMENT ++;; ++;; .................... ++ ++(define_expand "extzv" ++ [(set (match_operand:GPR 0 "register_operand") ++ (zero_extract:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand 2 "const_int_operand") ++ (match_operand 3 "const_int_operand")))] ++ "" ++{ ++ if (!loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]), ++ INTVAL (operands[3]))) ++ FAIL; ++}) ++ ++(define_insn "*extzv" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (zero_extract:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]), ++ INTVAL (operands[3]))" ++{ ++ operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) -1 ); ++ return "bstrpick.\t%0,%1,%2,%3"; ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "")]) ++ ++(define_expand "insv" ++ [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand") ++ (match_operand 1 "const_int_operand") ++ (match_operand 2 "const_int_operand")) ++ (match_operand:GPR 3 "reg_or_0_operand"))] ++ "" ++{ ++ if (!loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]), ++ INTVAL (operands[2]))) ++ FAIL; ++}) ++ ++(define_insn "*insv" ++ [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand" "+d") ++ (match_operand:SI 1 "const_int_operand" "") ++ (match_operand:SI 2 "const_int_operand" "")) ++ (match_operand:GPR 3 "reg_or_0_operand" "dJ"))] ++ "loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]), ++ INTVAL (operands[2]))" ++{ ++ operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) -1 ); ++ return "bstrins.\t%0,%z3,%1,%2"; ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "")]) ++ ++;; Allow combine to split complex const_int load sequences, using operand 2 ++;; to store the intermediate results. See move_operand for details. ++(define_split ++ [(set (match_operand:GPR 0 "register_operand") ++ (match_operand:GPR 1 "splittable_const_int_operand")) ++ (clobber (match_operand:GPR 2 "register_operand"))] ++ "" ++ [(const_int 0)] ++{ ++ loongarch_move_integer (operands[2], operands[0], INTVAL (operands[1])); ++ DONE; ++}) ++ ++;; 64-bit integer moves ++ ++;; Unlike most other insns, the move insns can't be split with ++;; different predicates, because register spilling and other parts of ++;; the compiler, have memoized the insn number already. ++ ++(define_expand "movdi" ++ [(set (match_operand:DI 0 "") ++ (match_operand:DI 1 ""))] ++ "" ++{ ++ if (loongarch_legitimize_move (DImode, operands[0], operands[1])) ++ DONE; ++}) ++ ++ ++(define_insn "*movdi_32bit" ++ [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,ZC,d,m,*f,*f,*d,*m") ++ (match_operand:DI 1 "move_operand" "d,i,ZC,d,m,d,*J*d,*m,*f,*f"))] ++ "!TARGET_64BIT ++ && (register_operand (operands[0], DImode) ++ || reg_or_0_operand (operands[1], DImode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore") ++ (set (attr "mode") ++ (if_then_else (eq_attr "move_type" "imul") ++ (const_string "SI") ++ (const_string "DI")))]) ++ ++ ++(define_insn "*movdi_64bit" ++ [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,ZC,d,m,*f,*f,*d,*m") ++ (match_operand:DI 1 "move_operand" "d,Yd,ZC,dJ,m,dJ,*d*J,*m,*f,*f"))] ++ "TARGET_64BIT ++ && (register_operand (operands[0], DImode) ++ || reg_or_0_operand (operands[1], DImode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore") ++ (set_attr "mode" "DI")]) ++ ++;; 32-bit Integer moves ++ ++;; Unlike most other insns, the move insns can't be split with ++;; different predicates, because register spilling and other parts of ++;; the compiler, have memoized the insn number already. ++ ++(define_expand "mov" ++ [(set (match_operand:IMOVE32 0 "") ++ (match_operand:IMOVE32 1 ""))] ++ "" ++{ ++ if (loongarch_legitimize_move (mode, operands[0], operands[1])) ++ DONE; ++}) ++ ++;; The difference between these two is whether or not ints are allowed ++;; in FP registers (off by default, use -mdebugh to enable). ++ ++(define_insn "*mov_internal" ++ [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=d,d,d,ZC,d,m,*f,*f,*d,*m,*d,*z") ++ (match_operand:IMOVE32 1 "move_operand" "d,Yd,ZC,dJ,m,dJ,*d*J,*m,*f,*f,*z,*d"))] ++ "(register_operand (operands[0], mode) ++ || reg_or_0_operand (operands[1], mode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") ++ (set_attr "compression" "all,*,*,*,*,*,*,*,*,*,*,*") ++ (set_attr "mode" "SI")]) ++ ++ ++ ++;; LARCH supports loading and storing a floating point register from ++;; the sum of two general registers. We use two versions for each of ++;; these four instructions: one where the two general registers are ++;; SImode, and one where they are DImode. This is because general ++;; registers will be in SImode when they hold 32-bit values, but, ++;; since the 32-bit values are always sign extended, the [ls][wd]xc1 ++;; instructions will still work correctly. ++ ++;; ??? Perhaps it would be better to support these instructions by ++;; modifying TARGET_LEGITIMATE_ADDRESS_P and friends. However, since ++;; these instructions can only be used to load and store floating ++;; point registers, that would probably cause trouble in reload. ++ ++(define_insn "*_" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d") ++ (match_operand:P 2 "register_operand" "d"))))] ++ "" ++ "\t%0,%1,%2" ++ [(set_attr "type" "fpidxload") ++ (set_attr "mode" "")]) ++ ++(define_insn "*_" ++ [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d") ++ (match_operand:P 2 "register_operand" "d"))) ++ (match_operand:ANYF 0 "register_operand" "f"))] ++ "TARGET_HARD_FLOAT" ++ "\t%0,%1,%2" ++ [(set_attr "type" "fpidxstore") ++ (set_attr "mode" "")]) ++ ++;; LoongArch index address load and store. ++(define_insn "*_" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (mem:GPR ++ (plus:P (match_operand:P 1 "register_operand" "d") ++ (match_operand:P 2 "register_operand" "d"))))] ++ "" ++ "\t%0,%1,%2" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "*_" ++ [(set (mem:GPR (plus:P (match_operand:P 1 "register_operand" "d") ++ (match_operand:P 2 "register_operand" "d"))) ++ (match_operand:GPR 0 "register_operand" "d"))] ++ "" ++ "\t%0,%1,%2" ++ [(set_attr "type" "store") ++ (set_attr "mode" "")]) ++ ++;; SHORT mode sign_extend. ++(define_insn "*extend__" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (sign_extend:GPR ++ (mem:SHORT ++ (plus:P (match_operand:P 1 "register_operand" "d") ++ (match_operand:P 2 "register_operand" "d")))))] ++ "" ++ "\t%0,%1,%2" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "*extend_" ++ [(set (mem:SHORT (plus:P (match_operand:P 1 "register_operand" "d") ++ (match_operand:P 2 "register_operand" "d"))) ++ (match_operand:SHORT 0 "register_operand" "d"))] ++ "" ++ "\t%0,%1,%2" ++ [(set_attr "type" "store") ++ (set_attr "mode" "SI")]) ++ ++ ++;; 16-bit Integer moves ++ ++;; Unlike most other insns, the move insns can't be split with ++;; different predicates, because register spilling and other parts of ++;; the compiler, have memoized the insn number already. ++;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND. ++ ++(define_expand "movhi" ++ [(set (match_operand:HI 0 "") ++ (match_operand:HI 1 ""))] ++ "" ++{ ++ if (loongarch_legitimize_move (HImode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_insn "*movhi_internal" ++ [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,m") ++ (match_operand:HI 1 "move_operand" "d,Yd,I,m,dJ"))] ++ "(register_operand (operands[0], HImode) ++ || reg_or_0_operand (operands[1], HImode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,const,load,store") ++ (set_attr "compression" "all,all,*,*,*") ++ (set_attr "mode" "HI")]) ++ ++;; 8-bit Integer moves ++ ++;; Unlike most other insns, the move insns can't be split with ++;; different predicates, because register spilling and other parts of ++;; the compiler, have memoized the insn number already. ++;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND. ++ ++(define_expand "movqi" ++ [(set (match_operand:QI 0 "") ++ (match_operand:QI 1 ""))] ++ "" ++{ ++ if (loongarch_legitimize_move (QImode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_insn "*movqi_internal" ++ [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m") ++ (match_operand:QI 1 "move_operand" "d,I,m,dJ"))] ++ "(register_operand (operands[0], QImode) ++ || reg_or_0_operand (operands[1], QImode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,load,store") ++ (set_attr "compression" "all,*,*,*") ++ (set_attr "mode" "QI")]) ++ ++;; 32-bit floating point moves ++ ++(define_expand "movsf" ++ [(set (match_operand:SF 0 "") ++ (match_operand:SF 1 ""))] ++ "" ++{ ++ if (loongarch_legitimize_move (SFmode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_insn "*movsf_hardfloat" ++ [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m") ++ (match_operand:SF 1 "move_operand" "f,G,m,f,G,*d,*f,*G*d,*m,*d"))] ++ "TARGET_HARD_FLOAT ++ && (register_operand (operands[0], SFmode) ++ || reg_or_0_operand (operands[1], SFmode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store") ++ (set_attr "mode" "SF")]) ++ ++(define_insn "*movsf_softfloat" ++ [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,m") ++ (match_operand:SF 1 "move_operand" "Gd,m,d"))] ++ "TARGET_SOFT_FLOAT ++ && (register_operand (operands[0], SFmode) ++ || reg_or_0_operand (operands[1], SFmode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,load,store") ++ (set_attr "mode" "SF")]) ++ ++ ++;; 64-bit floating point moves ++ ++(define_expand "movdf" ++ [(set (match_operand:DF 0 "") ++ (match_operand:DF 1 ""))] ++ "" ++{ ++ if (loongarch_legitimize_move (DFmode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_insn "*movdf_hardfloat" ++ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m") ++ (match_operand:DF 1 "move_operand" "f,G,m,f,G,*d,*f,*d*G,*m,*d"))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT ++ && (register_operand (operands[0], DFmode) ++ || reg_or_0_operand (operands[1], DFmode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store") ++ (set_attr "mode" "DF")]) ++ ++(define_insn "*movdf_softfloat" ++ [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,m") ++ (match_operand:DF 1 "move_operand" "dG,m,dG"))] ++ "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) ++ && (register_operand (operands[0], DFmode) ++ || reg_or_0_operand (operands[1], DFmode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,load,store") ++ (set_attr "mode" "DF")]) ++ ++ ++;; 128-bit integer moves ++ ++(define_expand "movti" ++ [(set (match_operand:TI 0) ++ (match_operand:TI 1))] ++ "TARGET_64BIT" ++{ ++ if (loongarch_legitimize_move (TImode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_insn "*movti" ++ [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m") ++ (match_operand:TI 1 "move_operand" "d,i,m,dJ"))] ++ "TARGET_64BIT ++ && (register_operand (operands[0], TImode) ++ || reg_or_0_operand (operands[1], TImode))" ++ { return loongarch_output_move (operands[0], operands[1]); } ++ [(set_attr "move_type" "move,const,load,store") ++ (set (attr "mode") ++ (if_then_else (eq_attr "move_type" "imul") ++ (const_string "SI") ++ (const_string "TI")))]) ++ ++ ++;; 128-bit floating point moves ++ ++(define_expand "movtf" ++ [(set (match_operand:TF 0) ++ (match_operand:TF 1))] ++ "TARGET_64BIT" ++{ ++ if (loongarch_legitimize_move (TFmode, operands[0], operands[1])) ++ DONE; ++}) ++ ++;; This pattern handles both hard- and soft-float cases. ++(define_insn "*movtf" ++ [(set (match_operand:TF 0 "nonimmediate_operand" "=d,d,m,f,d,f,m") ++ (match_operand:TF 1 "move_operand" "dG,m,dG,dG,f,m,f"))] ++ "TARGET_64BIT ++ && (register_operand (operands[0], TFmode) ++ || reg_or_0_operand (operands[1], TFmode))" ++ "#" ++ [(set_attr "move_type" "move,load,store,mgtf,mftg,fpload,fpstore") ++ (set_attr "mode" "TF")]) ++ ++ ++(define_split ++ [(set (match_operand:MOVE64 0 "nonimmediate_operand") ++ (match_operand:MOVE64 1 "move_operand"))] ++ "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)" ++ [(const_int 0)] ++{ ++ loongarch_split_move_insn (operands[0], operands[1], curr_insn); ++ DONE; ++}) ++ ++(define_split ++ [(set (match_operand:MOVE128 0 "nonimmediate_operand") ++ (match_operand:MOVE128 1 "move_operand"))] ++ "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)" ++ [(const_int 0)] ++{ ++ loongarch_split_move_insn (operands[0], operands[1], curr_insn); ++ DONE; ++}) ++ ++;; Emit a doubleword move in which exactly one of the operands is ++;; a floating-point register. We can't just emit two normal moves ++;; because of the constraints imposed by the FPU register model; ++;; see loongarch_cannot_change_mode_class for details. Instead, we keep ++;; the FPR whole and use special patterns to refer to each word of ++;; the other operand. ++ ++(define_expand "move_doubleword_fpr" ++ [(set (match_operand:SPLITF 0) ++ (match_operand:SPLITF 1))] ++ "" ++{ ++ if (FP_REG_RTX_P (operands[0])) ++ { ++ rtx low = loongarch_subword (operands[1], 0); ++ rtx high = loongarch_subword (operands[1], 1); ++ emit_insn (gen_load_low (operands[0], low)); ++ if (!TARGET_64BIT) ++ emit_insn (gen_movgr2frh (operands[0], high, operands[0])); ++ else ++ emit_insn (gen_load_high (operands[0], high, operands[0])); ++ } ++ else ++ { ++ rtx low = loongarch_subword (operands[0], 0); ++ rtx high = loongarch_subword (operands[0], 1); ++ emit_insn (gen_store_word (low, operands[1], const0_rtx)); ++ if (!TARGET_64BIT) ++ emit_insn (gen_movfrh2gr (high, operands[1])); ++ else ++ emit_insn (gen_store_word (high, operands[1], const1_rtx)); ++ } ++ DONE; ++}) ++ ++;; Load the low word of operand 0 with operand 1. ++(define_insn "load_low" ++ [(set (match_operand:SPLITF 0 "register_operand" "=f,f") ++ (unspec:SPLITF [(match_operand: 1 "general_operand" "dJ,m")] ++ UNSPEC_LOAD_LOW))] ++ "TARGET_HARD_FLOAT" ++{ ++ operands[0] = loongarch_subword (operands[0], 0); ++ return loongarch_output_move (operands[0], operands[1]); ++} ++ [(set_attr "move_type" "mgtf,fpload") ++ (set_attr "mode" "")]) ++ ++;; Load the high word of operand 0 from operand 1, preserving the value ++;; in the low word. ++(define_insn "load_high" ++ [(set (match_operand:SPLITF 0 "register_operand" "=f,f") ++ (unspec:SPLITF [(match_operand: 1 "general_operand" "dJ,m") ++ (match_operand:SPLITF 2 "register_operand" "0,0")] ++ UNSPEC_LOAD_HIGH))] ++ "TARGET_HARD_FLOAT" ++{ ++ operands[0] = loongarch_subword (operands[0], 1); ++ return loongarch_output_move (operands[0], operands[1]); ++} ++ [(set_attr "move_type" "mgtf,fpload") ++ (set_attr "mode" "")]) ++ ++;; Store one word of operand 1 in operand 0. Operand 2 is 1 to store the ++;; high word and 0 to store the low word. ++(define_insn "store_word" ++ [(set (match_operand: 0 "nonimmediate_operand" "=d,m") ++ (unspec: [(match_operand:SPLITF 1 "register_operand" "f,f") ++ (match_operand 2 "const_int_operand")] ++ UNSPEC_STORE_WORD))] ++ "TARGET_HARD_FLOAT" ++{ ++ operands[1] = loongarch_subword (operands[1], INTVAL (operands[2])); ++ return loongarch_output_move (operands[0], operands[1]); ++} ++ [(set_attr "move_type" "mftg,fpstore") ++ (set_attr "mode" "")]) ++ ++;; Move operand 1 to the high word of operand 0 using movgr2frh, preserving the ++;; value in the low word. ++(define_insn "movgr2frh" ++ [(set (match_operand:SPLITF 0 "register_operand" "=f") ++ (unspec:SPLITF [(match_operand: 1 "reg_or_0_operand" "dJ") ++ (match_operand:SPLITF 2 "register_operand" "0")] ++ UNSPEC_MOVGR2FRH))] ++ "TARGET_HARD_FLOAT && ISA_HAS_MXFRH" ++ "movgr2frh.w\t%z1,%0" ++ [(set_attr "move_type" "mgtf") ++ (set_attr "mode" "")]) ++ ++;; Move high word of operand 1 to operand 0 using movfrh2gr. ++(define_insn "movfrh2gr" ++ [(set (match_operand: 0 "register_operand" "=d") ++ (unspec: [(match_operand:SPLITF 1 "register_operand" "f")] ++ UNSPEC_MOVFRH2GR))] ++ "TARGET_HARD_FLOAT && ISA_HAS_MXFRH" ++ "movfrh2gr.s\t%0,%1" ++ [(set_attr "move_type" "mftg") ++ (set_attr "mode" "")]) ++ ++;; Expand in-line code to clear the instruction cache between operand[0] and ++;; operand[1]. ++(define_expand "clear_cache" ++ [(match_operand 0 "pmode_register_operand") ++ (match_operand 1 "pmode_register_operand")] ++ "" ++ " ++{ ++ emit_insn (gen_ibar (const0_rtx)); ++ DONE; ++}") ++ ++(define_insn "ibar" ++ [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_IBAR)] ++ "" ++ "ibar\t%0") ++ ++(define_insn "dbar" ++ [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_DBAR)] ++ "" ++ "dbar\t%0") ++ ++ ++ ++;; Privileged state instruction ++ ++(define_insn "cpucfg" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_CPUCFG))] ++ "" ++ "cpucfg\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "asrtle_d" ++ [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "d") ++ (match_operand:DI 1 "register_operand" "d")] ++ UNSPEC_ASRTLE_D)] ++ "TARGET_64BIT" ++ "asrtle.d\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "DI")]) ++ ++(define_insn "asrtgt_d" ++ [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "d") ++ (match_operand:DI 1 "register_operand" "d")] ++ UNSPEC_ASRTGT_D)] ++ "TARGET_64BIT" ++ "asrtgt.d\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "DI")]) ++ ++(define_insn "

csrrd" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unspec_volatile:GPR [(match_operand 1 "const_uimm14_operand")] ++ UNSPEC_CSRRD))] ++ "" ++ "csrrd\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "

csrwr" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "register_operand" "0") ++ (match_operand 2 "const_uimm14_operand")] ++ UNSPEC_CSRWR))] ++ "" ++ "csrwr\t%0,%2" ++ [(set_attr "type" "store") ++ (set_attr "mode" "")]) ++ ++(define_insn "

csrxchg" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "register_operand" "0") ++ (match_operand:GPR 2 "register_operand" "q") ++ (match_operand 3 "const_uimm14_operand")] ++ UNSPEC_CSRXCHG))] ++ "" ++ "csrxchg\t%0,%2,%3" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "iocsrrd_" ++ [(set (match_operand:QHWD 0 "register_operand" "=d") ++ (unspec_volatile:QHWD [(match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_IOCSRRD))] ++ "" ++ "iocsrrd.\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "iocsrwr_" ++ [(unspec_volatile:QHWD [(match_operand:QHWD 0 "register_operand" "d") ++ (match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_IOCSRWR)] ++ "" ++ "iocsrwr.\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "

cacop" ++ [(unspec_volatile:X [(match_operand 0 "const_uimm5_operand") ++ (match_operand:X 1 "register_operand" "d") ++ (match_operand 2 "const_imm12_operand")] ++ UNSPEC_CACOP)] ++ "" ++ "cacop\t%0,%1,%2" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "

lddir" ++ [(unspec_volatile:X [(match_operand:X 0 "register_operand" "d") ++ (match_operand:X 1 "register_operand" "d") ++ (match_operand 2 "const_uimm5_operand")] ++ UNSPEC_LDDIR)] ++ "" ++ "lddir\t%0,%1,%2" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "

ldpte" ++ [(unspec_volatile:X [(match_operand:X 0 "register_operand" "d") ++ (match_operand 1 "const_uimm5_operand")] ++ UNSPEC_LDPTE)] ++ "" ++ "ldpte\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++ ++;; Block moves, see loongarch.c for more details. ++;; Argument 0 is the destination ++;; Argument 1 is the source ++;; Argument 2 is the length ++;; Argument 3 is the alignment ++ ++(define_expand "movmemsi" ++ [(parallel [(set (match_operand:BLK 0 "general_operand") ++ (match_operand:BLK 1 "general_operand")) ++ (use (match_operand:SI 2 "")) ++ (use (match_operand:SI 3 "const_int_operand"))])] ++ " !TARGET_MEMCPY" ++{ ++ if (loongarch_expand_block_move (operands[0], operands[1], operands[2])) ++ DONE; ++ else ++ FAIL; ++}) ++ ++;; ++;; .................... ++;; ++;; SHIFTS ++;; ++;; .................... ++ ++(define_expand "3" ++ [(set (match_operand:GPR 0 "register_operand") ++ (any_shift:GPR (match_operand:GPR 1 "register_operand") ++ (match_operand:SI 2 "arith_operand")))] ++ "" ++{ ++}) ++ ++(define_insn "*3" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (any_shift:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:SI 2 "arith_operand" "dI")))] ++ "" ++{ ++ if (CONST_INT_P (operands[2])) ++ { ++ operands[2] = GEN_INT (INTVAL (operands[2]) ++ & (GET_MODE_BITSIZE (mode) - 1)); ++ return "i.\t%0,%1,%2"; ++ } else ++ return ".\t%0,%1,%2"; ++} ++ [(set_attr "type" "shift") ++ (set_attr "compression" "none") ++ (set_attr "mode" "")]) ++ ++(define_insn "*si3_extend" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (sign_extend:DI ++ (any_shift:SI (match_operand:SI 1 "register_operand" "d") ++ (match_operand:SI 2 "arith_operand" "dI"))))] ++ "TARGET_64BIT" ++{ ++ if (CONST_INT_P (operands[2])) ++ { ++ operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); ++ return "i.w\t%0,%1,%2"; ++ } else ++ return ".w\t%0,%1,%2"; ++} ++ [(set_attr "type" "shift") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "zero_extend_ashift1" ++ [ (set (match_operand:DI 0 "register_operand" "=d") ++ (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "d") 0) ++ (match_operand 2 "const_immlsa_operand" "")) ++ (match_operand 3 "shift_mask_operand" "")))] ++"" ++"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$zero,%2" ++[(set_attr "type" "arith") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "2")]) ++ ++(define_insn "zero_extend_ashift2" ++ [ (set (match_operand:DI 0 "register_operand" "=d") ++ (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "d") ++ (match_operand 2 "const_immlsa_operand" "")) ++ (match_operand 3 "shift_mask_operand" "")))] ++"" ++"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$zero,%2" ++[(set_attr "type" "arith") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "2")]) ++ ++(define_insn "alsl_paired1" ++ [(set (match_operand:DI 0 "register_operand" "=&d") ++ (plus:DI (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "d") 0) ++ (match_operand 2 "const_immlsa_operand" "")) ++ (match_operand 3 "shift_mask_operand" "")) ++ (match_operand:DI 4 "register_operand" "d")))] ++ "" ++ "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,%4,%2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "2")]) ++ ++(define_insn "alsl_paired2" ++ [(set (match_operand:DI 0 "register_operand" "=&d") ++ (plus:DI (match_operand:DI 1 "register_operand" "d") ++ (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "d") ++ (match_operand 3 "const_immlsa_operand" "")) ++ (match_operand 4 "shift_mask_operand" ""))))] ++ "" ++ "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "2")]) ++ ++(define_insn "alsl_" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand 2 "const_immlsa_operand" "")) ++ (match_operand:GPR 3 "register_operand" "d")))] ++ "ISA_HAS_LSA" ++ "alsl.\t%0,%1,%3,%2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "")]) ++ ++(define_insn "rotr3" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (rotatert:GPR (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:SI 2 "arith_operand" "dI")))] ++ "" ++{ ++ if (CONST_INT_P (operands[2])) ++ { ++ return "rotri.\t%0,%1,%2"; ++ } else ++ return "rotr.\t%0,%1,%2"; ++} ++ [(set_attr "type" "shift") ++ (set_attr "mode" "")]) ++ ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=d") ++ (bswap:HI (match_operand:HI 1 "register_operand" "d")))] ++ "" ++ "revb.2h\t%0,%1" ++ [(set_attr "type" "shift")]) ++ ++(define_insn_and_split "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (bswap:SI (match_operand:SI 1 "register_operand" "d")))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH)) ++ (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))] ++ "" ++ [(set_attr "insn_count" "2")]) ++ ++(define_insn_and_split "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (bswap:DI (match_operand:DI 1 "register_operand" "d")))] ++ "TARGET_64BIT" ++ "#" ++ "" ++ [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH)) ++ (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))] ++ "" ++ [(set_attr "insn_count" "2")]) ++ ++(define_insn "wsbh" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "d")] UNSPEC_WSBH))] ++ "" ++ "revb.2h\t%0,%1" ++ [(set_attr "type" "shift")]) ++ ++(define_insn "dsbh" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSBH))] ++ "TARGET_64BIT" ++ "revb.4h\t%0,%1" ++ [(set_attr "type" "shift")]) ++ ++(define_insn "dshd" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSHD))] ++ "TARGET_64BIT" ++ "revh.d\t%0,%1" ++ [(set_attr "type" "shift")]) ++ ++;; ++;; .................... ++;; ++;; CONDITIONAL BRANCHES ++;; ++;; .................... ++ ++;; Conditional branches on floating-point equality tests. ++ ++(define_insn "*branch_fp_CCmode" ++ [(set (pc) ++ (if_then_else ++ (match_operator 1 "equality_operator" ++ [(match_operand:CC 2 "register_operand" "z") ++ (const_int 0)]) ++ (label_ref (match_operand 0 "" "")) ++ (pc)))] ++ "TARGET_HARD_FLOAT" ++{ ++ return loongarch_output_conditional_branch (insn, operands, ++ LARCH_BRANCH ("b%F1", "%Z2%0"), ++ LARCH_BRANCH ("b%W1", "%Z2%0")); ++} ++ [(set_attr "type" "branch")]) ++ ++(define_insn "*branch_fp_inverted_CCmode" ++ [(set (pc) ++ (if_then_else ++ (match_operator 1 "equality_operator" ++ [(match_operand:CC 2 "register_operand" "z") ++ (const_int 0)]) ++ (pc) ++ (label_ref (match_operand 0 "" ""))))] ++ "TARGET_HARD_FLOAT" ++{ ++ return loongarch_output_conditional_branch (insn, operands, ++ LARCH_BRANCH ("b%W1", "%Z2%0"), ++ LARCH_BRANCH ("b%F1", "%Z2%0")); ++} ++ [(set_attr "type" "branch")]) ++ ++;; Conditional branches on ordered comparisons with zero. ++ ++(define_insn "*branch_order" ++ [(set (pc) ++ (if_then_else ++ (match_operator 1 "order_operator" ++ [(match_operand:GPR 2 "register_operand" "d,d") ++ (match_operand:GPR 3 "reg_or_0_operand" "J,d")]) ++ (label_ref (match_operand 0 "" "")) ++ (pc)))] ++ "" ++ { return loongarch_output_order_conditional_branch (insn, operands, false); } ++ [(set_attr "type" "branch") ++ (set_attr "compact_form" "maybe,always") ++ (set_attr "hazard" "forbidden_slot")]) ++ ++(define_insn "*branch_order_inverted" ++ [(set (pc) ++ (if_then_else ++ (match_operator 1 "order_operator" ++ [(match_operand:GPR 2 "register_operand" "d,d") ++ (match_operand:GPR 3 "reg_or_0_operand" "J,d")]) ++ (pc) ++ (label_ref (match_operand 0 "" ""))))] ++ "" ++ { return loongarch_output_order_conditional_branch (insn, operands, true); } ++ [(set_attr "type" "branch") ++ (set_attr "compact_form" "maybe,always") ++ (set_attr "hazard" "forbidden_slot")]) ++ ++;; Conditional branch on equality comparison. ++ ++(define_insn "*branch_equality" ++ [(set (pc) ++ (if_then_else ++ (match_operator 1 "equality_operator" ++ [(match_operand:GPR 2 "register_operand" "d") ++ (match_operand:GPR 3 "reg_or_0_operand" "dJ")]) ++ (label_ref (match_operand 0 "" "")) ++ (pc)))] ++ "" ++ { return loongarch_output_equal_conditional_branch (insn, operands, false); } ++ [(set_attr "type" "branch") ++ (set_attr "compact_form" "maybe") ++ (set_attr "hazard" "forbidden_slot")]) ++ ++ ++(define_insn "*branch_equality_inverted" ++ [(set (pc) ++ (if_then_else ++ (match_operator 1 "equality_operator" ++ [(match_operand:GPR 2 "register_operand" "d") ++ (match_operand:GPR 3 "reg_or_0_operand" "dJ")]) ++ (pc) ++ (label_ref (match_operand 0 "" ""))))] ++ "" ++ { return loongarch_output_equal_conditional_branch (insn, operands, true); } ++ [(set_attr "type" "branch") ++ (set_attr "compact_form" "maybe") ++ (set_attr "hazard" "forbidden_slot")]) ++ ++ ++(define_expand "cbranch4" ++ [(set (pc) ++ (if_then_else (match_operator 0 "comparison_operator" ++ [(match_operand:GPR 1 "register_operand") ++ (match_operand:GPR 2 "nonmemory_operand")]) ++ (label_ref (match_operand 3 "")) ++ (pc)))] ++ "" ++{ ++ loongarch_expand_conditional_branch (operands); ++ DONE; ++}) ++ ++(define_expand "cbranch4" ++ [(set (pc) ++ (if_then_else (match_operator 0 "comparison_operator" ++ [(match_operand:SCALARF 1 "register_operand") ++ (match_operand:SCALARF 2 "register_operand")]) ++ (label_ref (match_operand 3 "")) ++ (pc)))] ++ "" ++{ ++ loongarch_expand_conditional_branch (operands); ++ DONE; ++}) ++ ++;; Used to implement built-in functions. ++(define_expand "condjump" ++ [(set (pc) ++ (if_then_else (match_operand 0) ++ (label_ref (match_operand 1)) ++ (pc)))]) ++ ++ ++ ++;; ++;; .................... ++;; ++;; SETTING A REGISTER FROM A COMPARISON ++;; ++;; .................... ++ ++;; Destination is always set in SI mode. ++ ++(define_expand "cstore4" ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operator:SI 1 "loongarch_cstore_operator" ++ [(match_operand:GPR 2 "register_operand") ++ (match_operand:GPR 3 "nonmemory_operand")]))] ++ "" ++{ ++ loongarch_expand_scc (operands); ++ DONE; ++}) ++ ++(define_insn "*seq_zero_" ++ [(set (match_operand:GPR2 0 "register_operand" "=d") ++ (eq:GPR2 (match_operand:GPR 1 "register_operand" "d") ++ (const_int 0)))] ++ "" ++ "sltui\t%0,%1,1" ++ [(set_attr "type" "slt") ++ (set_attr "mode" "")]) ++ ++ ++(define_insn "*sne_zero_" ++ [(set (match_operand:GPR2 0 "register_operand" "=d") ++ (ne:GPR2 (match_operand:GPR 1 "register_operand" "d") ++ (const_int 0)))] ++ "" ++ "sltu\t%0,%.,%1" ++ [(set_attr "type" "slt") ++ (set_attr "mode" "")]) ++ ++(define_insn "*sgt_" ++ [(set (match_operand:GPR2 0 "register_operand" "=d") ++ (any_gt:GPR2 (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "reg_or_0_operand" "dJ")))] ++ "" ++ "slt\t%0,%z2,%1" ++ [(set_attr "type" "slt") ++ (set_attr "mode" "")]) ++ ++ ++(define_insn "*sge_" ++ [(set (match_operand:GPR2 0 "register_operand" "=d") ++ (any_ge:GPR2 (match_operand:GPR 1 "register_operand" "d") ++ (const_int 1)))] ++ "" ++ "slti\t%0,%.,%1" ++ [(set_attr "type" "slt") ++ (set_attr "mode" "")]) ++ ++(define_insn "*slt_" ++ [(set (match_operand:GPR2 0 "register_operand" "=d") ++ (any_lt:GPR2 (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "arith_operand" "dI")))] ++ "" ++{ ++ if (CONST_INT_P (operands[2])) ++ { ++ return "slti\t%0,%1,%2"; ++ } else ++ return "slt\t%0,%1,%2"; ++} ++ [(set_attr "type" "slt") ++ (set_attr "mode" "")]) ++ ++ ++(define_insn "*sle_" ++ [(set (match_operand:GPR2 0 "register_operand" "=d") ++ (any_le:GPR2 (match_operand:GPR 1 "register_operand" "d") ++ (match_operand:GPR 2 "sle_operand" "")))] ++ "" ++{ ++ operands[2] = GEN_INT (INTVAL (operands[2]) + 1); ++ return "slti\t%0,%1,%2"; ++} ++ [(set_attr "type" "slt") ++ (set_attr "mode" "")]) ++ ++ ++;; ++;; .................... ++;; ++;; FLOATING POINT COMPARISONS ++;; ++;; .................... ++ ++(define_insn "s__using_CCmode" ++ [(set (match_operand:CC 0 "register_operand" "=z") ++ (fcond:CC (match_operand:SCALARF 1 "register_operand" "f") ++ (match_operand:SCALARF 2 "register_operand" "f")))] ++ "" ++ "fcmp..\t%Z0%1,%2" ++ [(set_attr "type" "fcmp") ++ (set_attr "mode" "FPSW")]) ++ ++(define_insn "s__using_CCmode" ++ [(set (match_operand:CC 0 "register_operand" "=z") ++ (swapped_fcond:CC (match_operand:SCALARF 1 "register_operand" "f") ++ (match_operand:SCALARF 2 "register_operand" "f")))] ++ "" ++ "fcmp..\t%Z0%2,%1" ++ [(set_attr "type" "fcmp") ++ (set_attr "mode" "FPSW")]) ++ ++;; ++;; .................... ++;; ++;; UNCONDITIONAL BRANCHES ++;; ++;; .................... ++ ++;; Unconditional branches. ++ ++(define_expand "jump" ++ [(set (pc) ++ (label_ref (match_operand 0)))]) ++ ++(define_insn "*jump_absolute" ++ [(set (pc) ++ (label_ref (match_operand 0)))] ++ "TARGET_ABSOLUTE_JUMPS" ++{ ++ return LARCH_ABSOLUTE_JUMP ("b\t%l0"); ++} ++ [(set_attr "type" "branch") ++ (set_attr "compact_form" "maybe")]) ++ ++(define_insn "*jump_pic" ++ [(set (pc) ++ (label_ref (match_operand 0)))] ++ "!TARGET_ABSOLUTE_JUMPS" ++{ ++ return "b\t%0"; ++} ++ [(set_attr "type" "branch") ++ (set_attr "compact_form" "maybe")]) ++ ++ ++ ++(define_expand "indirect_jump" ++ [(set (pc) (match_operand 0 "register_operand"))] ++ "" ++{ ++ operands[0] = force_reg (Pmode, operands[0]); ++ emit_jump_insn (PMODE_INSN (gen_indirect_jump, (operands[0]))); ++ DONE; ++}) ++ ++(define_insn "indirect_jump_" ++ [(set (pc) (match_operand:P 0 "register_operand" "d"))] ++ "" ++ { ++ return "jirl\t$zero,%0,0"; ++ } ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) ++ ++(define_expand "tablejump" ++ [(set (pc) ++ (match_operand 0 "register_operand")) ++ (use (label_ref (match_operand 1 "")))] ++ "" ++{ ++ if (flag_pic) ++ operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], ++ gen_rtx_LABEL_REF (Pmode, operands[1]), ++ NULL_RTX, 0, OPTAB_DIRECT); ++ emit_jump_insn (PMODE_INSN (gen_tablejump, (operands[0], operands[1]))); ++ DONE; ++}) ++ ++(define_insn "tablejump_" ++ [(set (pc) ++ (match_operand:P 0 "register_operand" "d")) ++ (use (label_ref (match_operand 1 "" "")))] ++ "" ++ { ++ return "jirl\t$zero,%0,0"; ++ } ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) ++ ++ ++;; ++;; .................... ++;; ++;; Function prologue/epilogue ++;; ++;; .................... ++;; ++ ++(define_expand "prologue" ++ [(const_int 1)] ++ "" ++{ ++ n_loongarch_expand_prologue (); ++ DONE; ++}) ++ ++;; Block any insns from being moved before this point, since the ++;; profiling call to mcount can use various registers that aren't ++;; saved or used to pass arguments. ++ ++(define_insn "blockage" ++ [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)] ++ "" ++ "" ++ [(set_attr "type" "ghost") ++ (set_attr "mode" "none")]) ++ ++(define_insn "probe_stack_range_" ++ [(set (match_operand:P 0 "register_operand" "=d") ++ (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") ++ (match_operand:P 2 "register_operand" "d")] ++ UNSPEC_PROBE_STACK_RANGE))] ++ "" ++ { return loongarch_output_probe_stack_range (operands[0], operands[2]); } ++ [(set_attr "type" "unknown") ++ (set_attr "can_delay" "no") ++ (set_attr "mode" "")]) ++ ++(define_expand "epilogue" ++ [(const_int 2)] ++ "" ++{ ++ n_loongarch_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(const_int 2)] ++ "" ++{ ++ n_loongarch_expand_epilogue (true); ++ DONE; ++}) ++ ++;; Trivial return. Make it look like a normal return insn as that ++;; allows jump optimizations to work better. ++ ++(define_expand "return" ++ [(simple_return)] ++ "loongarch_can_use_return_insn ()" ++ { }) ++ ++(define_expand "simple_return" ++ [(simple_return)] ++ "" ++ { }) ++ ++(define_insn "*" ++ [(any_return)] ++ "" ++ { ++ operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); ++ return "jirl\t$zero,%0,0"; ++ } ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) ++ ++;; Normal return. ++ ++(define_insn "_internal" ++ [(any_return) ++ (use (match_operand 0 "pmode_register_operand" ""))] ++ "" ++ { ++ return "jirl\t$zero,%0,0"; ++ } ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) ++ ++;; Exception return. ++(define_insn "loongarch_ertn" ++ [(return) ++ (unspec_volatile [(const_int 0)] UNSPEC_ERTN)] ++ "" ++ "ertn" ++ [(set_attr "type" "trap") ++ (set_attr "mode" "none")]) ++ ++;; Disable interrupts. ++(define_insn "loongarch_di" ++ [(unspec_volatile [(const_int 0)] UNSPEC_DI)] ++ "" ++ "di" ++ [(set_attr "type" "trap") ++ (set_attr "mode" "none")]) ++ ++;; Execution hazard barrier. ++(define_insn "loongarch_ehb" ++ [(unspec_volatile [(const_int 0)] UNSPEC_EHB)] ++ "" ++ "ehb" ++ [(set_attr "type" "trap") ++ (set_attr "mode" "none")]) ++ ++;; Read GPR from previous shadow register set. ++(define_insn "loongarch_rdpgpr_" ++ [(set (match_operand:P 0 "register_operand" "=d") ++ (unspec_volatile:P [(match_operand:P 1 "register_operand" "d")] ++ UNSPEC_RDPGPR))] ++ "" ++ "rdpgpr\t%0,%1" ++ [(set_attr "type" "move") ++ (set_attr "mode" "")]) ++ ++;; This is used in compiling the unwind routines. ++(define_expand "eh_return" ++ [(use (match_operand 0 "general_operand"))] ++ "" ++{ ++ if (GET_MODE (operands[0]) != word_mode) ++ operands[0] = convert_to_mode (word_mode, operands[0], 0); ++ if (TARGET_64BIT) ++ emit_insn (gen_eh_set_lr_di (operands[0])); ++ else ++ emit_insn (gen_eh_set_lr_si (operands[0])); ++ DONE; ++}) ++ ++;; Clobber the return address on the stack. We can't expand this ++;; until we know where it will be put in the stack frame. ++ ++(define_insn "eh_set_lr_si" ++ [(unspec [(match_operand:SI 0 "register_operand" "d")] UNSPEC_EH_RETURN) ++ (clobber (match_scratch:SI 1 "=&d"))] ++ "! TARGET_64BIT" ++ "#") ++ ++(define_insn "eh_set_lr_di" ++ [(unspec [(match_operand:DI 0 "register_operand" "d")] UNSPEC_EH_RETURN) ++ (clobber (match_scratch:DI 1 "=&d"))] ++ "TARGET_64BIT" ++ "#") ++ ++(define_split ++ [(unspec [(match_operand 0 "register_operand")] UNSPEC_EH_RETURN) ++ (clobber (match_scratch 1))] ++ "reload_completed" ++ [(const_int 0)] ++{ ++ loongarch_set_return_address (operands[0], operands[1]); ++ DONE; ++}) ++ ++ ++ ++;; ++;; .................... ++;; ++;; FUNCTION CALLS ++;; ++;; .................... ++ ++ ++;; Sibling calls. All these patterns use jump instructions. ++ ++;; If TARGET_SIBCALLS, call_insn_operand will only accept constant ++;; addresses if a direct jump is acceptable. Since the 'S' constraint ++;; is defined in terms of call_insn_operand, the same is true of the ++;; constraints. ++ ++;; When we use an indirect jump, we need a register that will be ++;; preserved by the epilogue. ++ ++(define_expand "sibcall" ++ [(parallel [(call (match_operand 0 "") ++ (match_operand 1 "")) ++ (use (match_operand 2 "")) ;; next_arg_reg ++ (use (match_operand 3 ""))])] ;; struct_value_size_rtx ++ "TARGET_SIBCALLS" ++{ ++ rtx target = loongarch_legitimize_call_address (XEXP (operands[0], 0)); ++ ++ emit_call_insn (gen_sibcall_internal (target, operands[1])); ++ DONE; ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,a,t,h")) ++ (match_operand 1 "" ""))] ++ "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%0"; ++ case 1: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$t0,(%%pcrel(%0+0x20000))>>18\n\tjirl\t$zero,$t0,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.local\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "b\t%0"; ++ case 2: ++ if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) ++ return "b\t%0"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "la.global\t$t0,%0\n\tjr\t$t0"; ++ case 3: ++ if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "la.global\t$t0,%0\n\tjr\t$t0"; ++ case 4: ++ if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) ++ return "b\t%%plt(%0)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$t0,(%%plt(%0)+0x20000)>>18\n\tjirl\t$zero,$t0,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; ++ else ++ sorry ("cmodel extreme and tiny static not support plt."); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "jal" "indirect,direct,direct,direct,direct")]) ++ ++(define_expand "sibcall_value" ++ [(parallel [(set (match_operand 0 "") ++ (call (match_operand 1 "") ++ (match_operand 2 ""))) ++ (use (match_operand 3 ""))])] ;; next_arg_reg ++ "TARGET_SIBCALLS" ++{ ++ rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0)); ++ ++ /* Handle return values created by loongarch_return_fpr_pair. */ ++ if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2) ++ { ++ emit_call_insn (gen_sibcall_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0), ++ target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0))); ++ } ++ else ++ { ++ /* Handle return values created by loongarch_return_fpr_single. */ ++ if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1) ++ operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); ++ ++ emit_call_insn (gen_sibcall_value_internal (operands[0], target, operands[2])); ++ } ++ DONE; ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "") ++ (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) ++ (match_operand 2 "" "")))] ++ "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%1"; ++ case 1: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$t0,%%pcrel(%1+0x20000)>>18\n\tjirl\t$zero,$t0,%%pcrel(%1+4)-((%%pcrel(%1+4+0x20000))>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.local\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "b\t%1"; ++ case 2: ++ if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) ++ return "b\t%1"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "la.global\t$t0,%1\n\tjr\t$t0"; ++ case 3: ++ if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "la.global\t$t0,%1\n\tjr\t$t0"; ++ case 4: ++ if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) ++ return " b\t%%plt(%1)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$t0,(%%plt(%1)+0x20000)>>18\n\tjirl\t$zero,$t0,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ else ++ sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "jal" "indirect,direct,direct,direct,direct")]) ++ ++(define_insn "sibcall_value_multiple_internal" ++ [(set (match_operand 0 "register_operand" "") ++ (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) ++ (match_operand 2 "" ""))) ++ (set (match_operand 3 "register_operand" "") ++ (call (mem:SI (match_dup 1)) ++ (match_dup 2)))] ++ "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%1"; ++ case 1: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$t0,%%pcrel(%1+0x20000)>>18\n\tjirl\t$zero,$t0,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.local\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "b\t%1"; ++ case 2: ++ if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) ++ return "b\t%1"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "la.global\t$t0,%1\n\tjr\t$t0"; ++ case 3: ++ if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; ++ else ++ return "la.global\t$t0,%1\n\tjr\t$t0"; ++ case 4: ++ if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) ++ return "b\t%%plt(%1)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$t0,(%%plt(%1)+0x20000)>>18\n\tjirl\t$zero,$t0,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ else ++ sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "jal" "indirect,direct,direct,direct,direct")]) ++ ++(define_expand "call" ++ [(parallel [(call (match_operand 0 "") ++ (match_operand 1 "")) ++ (use (match_operand 2 "")) ;; next_arg_reg ++ (use (match_operand 3 ""))])] ;; struct_value_size_rtx ++ "" ++{ ++ rtx target = loongarch_legitimize_call_address (XEXP (operands[0], 0)); ++ ++ emit_call_insn (gen_call_internal (target, operands[1])); ++ DONE; ++}) ++;; In the last case, we can generate the individual instructions with ++;; a define_split. There are several things to be wary of: ++;; ++;; - We can't expose the load of $gp before reload. If we did, ++;; it might get removed as dead, but reload can introduce new ++;; uses of $gp by rematerializing constants. ++;; ++;; - We shouldn't restore $gp after calls that never return. ++;; It isn't valid to insert instructions between a noreturn ++;; call and the following barrier. ++;; ++;; - The splitter deliberately changes the liveness of $gp. The unsplit ++;; instruction preserves $gp and so have no effect on its liveness. ++;; But once we generate the separate insns, it becomes obvious that ++;; $gp is not live on entry to the call. ++;; ++ ++(define_insn "call_internal" ++ [(call (mem:SI (match_operand 0 "call_insn_operand" "e,c,a,t,h")) ++ (match_operand 1 "" "")) ++ (clobber (reg:SI RETURN_ADDR_REGNUM))] ++ "" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$ra,%0,0"; ++ case 1: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$ra,%%pcrel(%0+0x20000)>>18\n\tjirl\t$ra,$ra,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.local\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "bl\t%0"; ++ case 2: ++ if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) ++ return "bl\t%0"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "la.global\t$ra,%0\n\tjirl\t$ra,$ra,0"; ++ case 3: ++ if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "la.global\t$ra,%0\n\tjirl\t$ra,$ra,0"; ++ case 4: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$ra,(%%plt(%0)+0x20000)>>18\n\tjirl\t$ra,$ra,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) ++ return "bl\t%%plt(%0)"; ++ else ++ sorry ("cmodel extreme and tiny-static not support plt."); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "jal" "indirect,direct,direct,direct,direct") ++ (set_attr "insn_count" "1,2,3,3,2")]) ++ ++ ++(define_expand "call_value" ++ [(parallel [(set (match_operand 0 "") ++ (call (match_operand 1 "") ++ (match_operand 2 ""))) ++ (use (match_operand 3 ""))])] ;; next_arg_reg ++ "" ++{ ++ rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0)); ++ /* Handle return values created by loongarch_return_fpr_pair. */ ++ if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2) ++ emit_call_insn (gen_call_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0), ++ target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0))); ++ else ++ { ++ /* Handle return values created by loongarch_return_fpr_single. */ ++ if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1) ++ operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); ++ ++ emit_call_insn (gen_call_value_internal (operands[0], target, operands[2])); ++ } ++ DONE; ++}) ++ ++;; See comment for call_internal. ++(define_insn "call_value_internal" ++ [(set (match_operand 0 "register_operand" "") ++ (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) ++ (match_operand 2 "" ""))) ++ (clobber (reg:SI RETURN_ADDR_REGNUM))] ++ "" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$ra,%1,0"; ++ case 1: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$ra,%%pcrel(%1+0x20000)>>18\n\tjirl\t$ra,$ra,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.local\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "bl\t%1"; ++ case 2: ++ if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) ++ return "bl\t%1"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; ++ case 3: ++ if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; ++ case 4: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$ra,(%%plt(%1)+0x20000)>>18\n\tjirl\t$ra,$ra,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) ++ return "bl\t%%plt(%1)"; ++ else ++ sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "jal" "indirect,direct,direct,direct,direct") ++ (set_attr "insn_count" "1,2,3,3,2")]) ++ ++;; See comment for call_internal. ++(define_insn "call_value_multiple_internal" ++ [(set (match_operand 0 "register_operand" "") ++ (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) ++ (match_operand 2 "" ""))) ++ (set (match_operand 3 "register_operand" "") ++ (call (mem:SI (match_dup 1)) ++ (match_dup 2))) ++ (clobber (reg:SI RETURN_ADDR_REGNUM))] ++ "" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$ra,%1,0"; ++ case 1: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$ra,%%pcrel(%1+0x20000)>>18\n\tjirl\t$ra,$ra,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.local\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "bl\t%1"; ++ case 2: ++ if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) ++ return "bl\t%1"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0 "; ++ else ++ return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; ++ case 3: ++ if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) ++ return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; ++ else ++ return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; ++ case 4: ++ if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) ++ return "pcaddu18i\t$ra,(%%plt(%1)+0x20000)>>18\n\tjirl\t$ra,$ra,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; ++ else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) ++ return "bl\t%%plt(%1)"; ++ else ++ sorry ("loongarch cmodel extreme and tiny-static not support plt."); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "jal" "indirect,direct,direct,direct,direct") ++ (set_attr "insn_count" "1,2,3,3,2")]) ++ ++ ++;; Call subroutine returning any type. ++ ++(define_expand "untyped_call" ++ [(parallel [(call (match_operand 0 "") ++ (const_int 0)) ++ (match_operand 1 "") ++ (match_operand 2 "")])] ++ "" ++{ ++ int i; ++ ++ emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); ++ ++ for (i = 0; i < XVECLEN (operands[2], 0); i++) ++ { ++ rtx set = XVECEXP (operands[2], 0, i); ++ loongarch_emit_move (SET_DEST (set), SET_SRC (set)); ++ } ++ ++ emit_insn (gen_blockage ()); ++ DONE; ++}) ++ ++;; ++;; .................... ++;; ++;; MISC. ++;; ++;; .................... ++;; ++ ++ ++(define_insn "*prefetch_indexed_" ++ [(prefetch (plus:P (match_operand:P 0 "register_operand" "d") ++ (match_operand:P 1 "register_operand" "d")) ++ (match_operand 2 "const_int_operand" "n") ++ (match_operand 3 "const_int_operand" "n"))] ++ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" ++{ ++ operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]); ++ return "prefx\t%2,%1(%0)"; ++} ++ [(set_attr "type" "prefetchx")]) ++ ++(define_insn "nop" ++ [(const_int 0)] ++ "" ++ "nop" ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none")]) ++ ++;; Like nop, but commented out when outside a .set noreorder block. ++(define_insn "hazard_nop" ++ [(const_int 1)] ++ "" ++ { ++ return "#nop"; ++ } ++ [(set_attr "type" "nop")]) ++ ++;; The `.insn' pseudo-op. ++(define_insn "insn_pseudo" ++ [(unspec_volatile [(const_int 0)] UNSPEC_INSN_PSEUDO)] ++ "" ++ ".insn" ++ [(set_attr "mode" "none") ++ (set_attr "insn_count" "0")]) ++ ++;; Conditional move instructions. ++ ++(define_insn "*sel_using_" ++ [(set (match_operand:GPR 0 "register_operand" "=d,d") ++ (if_then_else:GPR ++ (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "d,d") ++ (const_int 0)) ++ (match_operand:GPR 2 "reg_or_0_operand" "d,J") ++ (match_operand:GPR 3 "reg_or_0_operand" "J,d")))] ++ "register_operand (operands[2], mode) ++ != register_operand (operands[3], mode)" ++ "@ ++ \t%0,%2,%1 ++ \t%0,%3,%1" ++ [(set_attr "type" "condmove") ++ (set_attr "mode" "")]) ++ ++;; sel.fmt copies the 3rd argument when the 1st is non-zero and the 2nd ++;; argument if the 1st is zero. This means operand 2 and 3 are ++;; inverted in the instruction. ++ ++;; FIXME: fsel ++(define_insn "*sel" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (if_then_else:SCALARF ++ (ne:CC (match_operand:CC 1 "register_operand" "z") ++ (const_int 0)) ++ (match_operand:SCALARF 2 "reg_or_0_operand" "f") ++ (match_operand:SCALARF 3 "reg_or_0_operand" "f")))] ++ "" ++ "fsel\t%0,%3,%2,%1" ++ [(set_attr "type" "condmove") ++ (set_attr "mode" "")]) ++ ++;; These are the main define_expand's used to make conditional moves. ++ ++(define_expand "movcc" ++ [(set (match_dup 4) (match_operand 1 "comparison_operator")) ++ (set (match_operand:GPR 0 "register_operand") ++ (if_then_else:GPR (match_dup 5) ++ (match_operand:GPR 2 "reg_or_0_operand") ++ (match_operand:GPR 3 "reg_or_0_operand")))] ++ "" ++{ ++ if (!INTEGRAL_MODE_P (GET_MODE (XEXP (operands[1], 0)))) ++ FAIL; ++ ++ if (loongarch_expand_conditional_move (operands)) ++ DONE; ++ else ++ FAIL; ++}) ++ ++;; FIXME: fsel ++(define_expand "movcc" ++ [(set (match_dup 4) (match_operand 1 "comparison_operator")) ++ (set (match_operand:SCALARF 0 "register_operand") ++ (if_then_else:SCALARF (match_dup 5) ++ (match_operand:SCALARF 2 "reg_or_0_operand") ++ (match_operand:SCALARF 3 "reg_or_0_operand")))] ++ "" ++{ ++ if (!FLOAT_MODE_P (GET_MODE (XEXP (operands[1], 0)))) ++ FAIL; ++ ++ if (loongarch_expand_conditional_move (operands)) ++ DONE; ++ else ++ FAIL; ++}) ++ ++(define_split ++ [(match_operand 0 "small_data_pattern")] ++ "reload_completed" ++ [(match_dup 0)] ++ { operands[0] = loongarch_rewrite_small_data (operands[0]); }) ++ ++;; Thread-Local Storage ++ ++(define_insn "got_load_tls_gd" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_GD))] ++ "" ++ "la.tls.gd\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "got_load_tls_ld" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_LD))] ++ "" ++ "la.tls.ld\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "got_load_tls_le" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_LE))] ++ "" ++ "la.tls.le\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "got_load_tls_ie" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P ++ [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_TLS_IE))] ++ "" ++ "la.tls.ie\t%0,%1" ++ [(set_attr "got" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "loongarch_movfcsr2gr" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec_volatile:SI [(match_operand 1 "const_uimm5_operand")] UNSPEC_MOVFCSR2GR))] ++ "TARGET_HARD_FLOAT" ++ "movfcsr2gr\t%0,$r%1") ++ ++(define_insn "loongarch_movgr2fcsr" ++ [(unspec_volatile [(match_operand 0 "const_uimm5_operand") ++ (match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_MOVGR2FCSR)] ++ "TARGET_HARD_FLOAT" ++ "movgr2fcsr\t$r%0,%1") ++ ++ ++;; Match paired HI/SI/SF/DFmode load/stores. ++(define_insn "*join2_load_store" ++ [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand" "=d,f,m,m,d,ZC") ++ (match_operand:JOIN_MODE 1 "nonimmediate_operand" "m,m,d,f,ZC,d")) ++ (set (match_operand:JOIN_MODE 2 "nonimmediate_operand" "=d,f,m,m,d,ZC") ++ (match_operand:JOIN_MODE 3 "nonimmediate_operand" "m,m,d,f,ZC,d"))] ++ "ENABLE_LD_ST_PAIRS && reload_completed" ++ { ++ bool load_p = (which_alternative == 0 || which_alternative == 1); ++ /* Reg-renaming pass reuses base register if it is dead after bonded loads. ++ Hardware does not bond those loads, even when they are consecutive. ++ However, order of the loads need to be checked for correctness. */ ++ if (!load_p || !reg_overlap_mentioned_p (operands[0], operands[1])) ++ { ++ output_asm_insn (loongarch_output_move (operands[0], operands[1]), ++ operands); ++ output_asm_insn (loongarch_output_move (operands[2], operands[3]), ++ &operands[2]); ++ } ++ else ++ { ++ output_asm_insn (loongarch_output_move (operands[2], operands[3]), ++ &operands[2]); ++ output_asm_insn (loongarch_output_move (operands[0], operands[1]), ++ operands); ++ } ++ return ""; ++ } ++ [(set_attr "move_type" "load,fpload,store,fpstore,load,store") ++ (set_attr "insn_count" "2,2,2,2,2,2")]) ++ ++;; 2 HI/SI/SF/DF loads are joined. ++;; P5600 does not support bonding of two LBs, hence QI mode is not included. ++;; The loads must be non-volatile as they might be reordered at the time of asm ++;; generation. ++(define_peephole2 ++ [(set (match_operand:JOIN_MODE 0 "register_operand") ++ (match_operand:JOIN_MODE 1 "non_volatile_mem_operand")) ++ (set (match_operand:JOIN_MODE 2 "register_operand") ++ (match_operand:JOIN_MODE 3 "non_volatile_mem_operand"))] ++ "ENABLE_LD_ST_PAIRS ++ && loongarch_load_store_bonding_p (operands, mode, true)" ++ [(parallel [(set (match_dup 0) ++ (match_dup 1)) ++ (set (match_dup 2) ++ (match_dup 3))])] ++ "") ++ ++;; 2 HI/SI/SF/DF stores are joined. ++;; P5600 does not support bonding of two SBs, hence QI mode is not included. ++(define_peephole2 ++ [(set (match_operand:JOIN_MODE 0 "memory_operand") ++ (match_operand:JOIN_MODE 1 "register_operand")) ++ (set (match_operand:JOIN_MODE 2 "memory_operand") ++ (match_operand:JOIN_MODE 3 "register_operand"))] ++ "ENABLE_LD_ST_PAIRS ++ && loongarch_load_store_bonding_p (operands, mode, false)" ++ [(parallel [(set (match_dup 0) ++ (match_dup 1)) ++ (set (match_dup 2) ++ (match_dup 3))])] ++ "") ++ ++;; Match paired HImode loads. ++(define_insn "*join2_loadhi" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand" "m"))) ++ (set (match_operand:SI 2 "register_operand" "=r") ++ (any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand" "m")))] ++ "ENABLE_LD_ST_PAIRS && reload_completed" ++ { ++ /* Reg-renaming pass reuses base register if it is dead after bonded loads. ++ Hardware does not bond those loads, even when they are consecutive. ++ However, order of the loads need to be checked for correctness. */ ++ if (!reg_overlap_mentioned_p (operands[0], operands[1])) ++ { ++ output_asm_insn ("ld.h\t%0,%1", operands); ++ output_asm_insn ("ld.h\t%2,%3", operands); ++ } ++ else ++ { ++ output_asm_insn ("ld.h\t%2,%3", operands); ++ output_asm_insn ("ld.h\t%0,%1", operands); ++ } ++ ++ return ""; ++ } ++ [(set_attr "move_type" "load") ++ (set_attr "insn_count" "2")]) ++ ++ ++;; 2 HI loads are joined. ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand"))) ++ (set (match_operand:SI 2 "register_operand") ++ (any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand")))] ++ "ENABLE_LD_ST_PAIRS ++ && loongarch_load_store_bonding_p (operands, HImode, true)" ++ [(parallel [(set (match_dup 0) ++ (any_extend:SI (match_dup 1))) ++ (set (match_dup 2) ++ (any_extend:SI (match_dup 3)))])] ++ "") ++ ++ ++;; Logical AND NOT. ++(define_insn "loongson_gsandn" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR ++ (not:GPR (match_operand:GPR 1 "register_operand" "r")) ++ (match_operand:GPR 2 "register_operand" "r")))] ++ "" ++ "andn\t%0,%2,%1" ++ [(set_attr "type" "logical")]) ++ ++;; Logical AND NOT. ++(define_insn "loongson_gsorn" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (ior:GPR ++ (not:GPR (match_operand:GPR 1 "register_operand" "r")) ++ (match_operand:GPR 2 "register_operand" "r")))] ++ "" ++ "orn\t%0,%2,%1" ++ [(set_attr "type" "logical")]) ++ ++(define_insn "smax3" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (smax:SCALARF (match_operand:SCALARF 1 "register_operand" "f") ++ (match_operand:SCALARF 2 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT" ++ "fmax.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) ++ ++(define_insn "smin3" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (smin:SCALARF (match_operand:SCALARF 1 "register_operand" "f") ++ (match_operand:SCALARF 2 "register_operand" "f")))] ++ "TARGET_HARD_FLOAT" ++ "fmin.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) ++ ++(define_insn "smaxa3" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (if_then_else:SCALARF ++ (gt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f")) ++ (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f"))) ++ (match_dup 1) ++ (match_dup 2)))] ++ "TARGET_HARD_FLOAT" ++ "fmaxa.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) ++ ++(define_insn "smina3" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (if_then_else:SCALARF ++ (lt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f")) ++ (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f"))) ++ (match_dup 1) ++ (match_dup 2)))] ++ "TARGET_HARD_FLOAT" ++ "fmina.\t%0,%1,%2" ++ [(set_attr "type" "fmove") ++ (set_attr "mode" "")]) ++ ++(define_insn "frint_" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")] ++ UNSPEC_FRINT))] ++ "" ++ "frint.\t%0,%1" ++ [(set_attr "type" "fcvt") ++ (set_attr "mode" "")]) ++ ++(define_insn "fclass_" ++ [(set (match_operand:SCALARF 0 "register_operand" "=f") ++ (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")] ++ UNSPEC_FCLASS))] ++ "" ++ "fclass.\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "")]) ++ ++(define_insn "bytepick_w" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "d") ++ (match_operand:SI 2 "register_operand" "d") ++ (match_operand:SI 3 "const_0_to_3_operand" "n")] ++ UNSPEC_BYTEPICK_W))] ++ "" ++ "bytepick.w\t%0,%1,%2,%z3" ++ [(set_attr "type" "dspalu") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "bytepick_d" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "d") ++ (match_operand:DI 2 "register_operand" "d") ++ (match_operand:DI 3 "const_0_to_7_operand" "n")] ++ UNSPEC_BYTEPICK_D))] ++ "" ++ "bytepick.d\t%0,%1,%2,%z3" ++ [(set_attr "type" "dspalu") ++ (set_attr "mode" "DI")]) ++ ++(define_insn "bitrev_4b" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_BITREV_4B))] ++ "" ++ "bitrev.4b\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "bitrev_8b" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "d")] ++ UNSPEC_BITREV_8B))] ++ "" ++ "bitrev.8b\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "DI")]) ++ ++ ++ ++(define_expand "lu32i_d" ++ [(set (match_operand:DI 0 "register_operand") ++ (ior:DI ++ (zero_extend:DI ++ (subreg:SI (match_dup 0) 0)) ++ (match_operand:DI 1 "const_lu32i_operand")))] ++ "TARGET_64BIT" ++{}) ++ ++(define_insn "*lu32i_d" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (ior:DI ++ (zero_extend:DI ++ (subreg:SI (match_operand:DI 1 "register_operand" "0") 0)) ++ (match_operand:DI 2 "const_lu32i_operand" "u")))] ++ "TARGET_64BIT" ++ "lu32i.d\t%0,%X2>>32" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI")]) ++ ++(define_insn "lu52i_d" ++ [(set (match_operand:DI 0 "register_operand" "=d") ++ (unspec:DI [(match_operand:DI 1 "register_operand" "d") ++ (match_operand:DI 2 "const_lu52i_operand" "v")] ++ UNSPEC_LU52I_D))] ++ "TARGET_64BIT" ++ "lu52i.d\t%0,%1,%X2>>52" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "DI")]) ++ ++;;(define_insn "*lu32i_d" ++;; [(set (match_operand:DI 0 "register_operand" "+d") ++;; (ior:DI ++;; (zero_extend:DI ++;; (subreg:SI (match_dup 0) 0)) ++;; (match_operand:DI 1 "const_lu32i_operand" "u")))] ++;; "TARGET_64BIT" ++;; "lu32i.d\t%0,%1>>32" ++;; [(set_attr "type" "arith") ++;; (set_attr "mode" "DI")]) ++ ++(define_mode_iterator QHSD [QI HI SI DI]) ++ ++(define_insn "crc_w__w" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand:QHSD 1 "register_operand" "d") ++ (match_operand:SI 2 "register_operand" "d")] ++ UNSPEC_CRC))] ++ "" ++ "crc.w..w\t%0,%1,%2" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "")]) ++ ++(define_insn "crcc_w__w" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand:QHSD 1 "register_operand" "d") ++ (match_operand:SI 2 "register_operand" "d")] ++ UNSPEC_CRCC))] ++ "" ++ "crcc.w..w\t%0,%1,%2" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "")]) ++ ++;; Synchronization instructions. ++ ++(include "sync.md") ++ ++; LoongArch csr ++(include "lvz.md") ++ ++(define_c_enum "unspec" [ ++ UNSPEC_ADDRESS_FIRST ++]) ++ ++;; .................... ++;; ++;; N_LARCH-PORT ++;; ++;; .................... ++ ++(define_c_enum "unspecv" [ ++ ;; Register save and restore. ++ UNSPECV_GPR_SAVE ++ UNSPECV_GPR_RESTORE ++ ++ ;; Floating-point unspecs. ++ ;;UNSPECV_FRFLAGS ++ ;;UNSPECV_FSFLAGS ++ ++ ;; Blockage and synchronization. ++ ;;UNSPECV_BLOCKAGE ++ ;;UNSPECV_FENCE ++ ;;UNSPECV_FENCE_I ++]) ++ ++ ++;; Is copying of this instruction disallowed? ++(define_attr "cannot_copy" "no,yes" (const_string "no")) ++ ++(define_insn "stack_tie" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK [(match_operand:X 0 "register_operand" "r") ++ (match_operand:X 1 "register_operand" "r")] ++ UNSPEC_TIE))] ++ "" ++ "" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "gpr_save" ++ [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_SAVE) ++ (clobber (reg:SI T0_REGNUM)) ++ (clobber (reg:SI T1_REGNUM))] ++ "" ++ { return n_loongarch_output_gpr_save (INTVAL (operands[0])); }) ++ ++(define_insn "gpr_restore" ++ [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_RESTORE)] ++ "" ++ "tail\t__n_loongarch_restore_%0") ++ ++(define_insn "gpr_restore_return" ++ [(return) ++ (use (match_operand 0 "pmode_register_operand" "")) ++ (const_int 0)] ++ "" ++ "") ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-modes.def gcc-10.3.0/gcc/config/loongarch/loongarch-modes.def +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-modes.def 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-modes.def 2022-03-23 17:40:29.349280103 +0800 +@@ -0,0 +1,31 @@ ++/* LARCH extra machine modes. ++ Copyright (C) 2003-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++FLOAT_MODE (TF, 16, ieee_quad_format); ++ ++VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ ++ ++ ++INT_MODE (OI, 32); ++ ++/* Keep the OI modes from confusing the compiler into thinking ++ that these modes could actually be used for computation. They are ++ only holders for vectors during data movement. */ ++#define MAX_BITSIZE_MODE_ANY_INT (128) ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch.opt gcc-10.3.0/gcc/config/loongarch/loongarch.opt +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch.opt 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch.opt 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,223 @@ ++ ++; ++; Copyright (C) 2005-2018 Free Software Foundation, Inc. ++; ++; This file is part of GCC. ++; ++; GCC is free software; you can redistribute it and/or modify it under ++; the terms of the GNU General Public License as published by the Free ++; Software Foundation; either version 3, or (at your option) any later ++; version. ++; ++; GCC is distributed in the hope that it will be useful, but WITHOUT ++; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++; License for more details. ++; ++; You should have received a copy of the GNU General Public License ++; along with GCC; see the file COPYING3. If not see ++; . ++ ++HeaderInclude ++config/loongarch/loongarch-opts.h ++ ++EB ++Driver ++ ++EL ++Driver ++ ++mabi= ++Target RejectNegative Joined Enum(loongarch_abi) Var(loongarch_abi) Init(LARCH_ABI_DEFAULT) ++-mabi=ABI Generate code that conforms to the given ABI. ++ ++Enum ++Name(loongarch_abi) Type(int) ++Known Loongarch ABIs (for use with the -mabi= option): ++ ++EnumValue ++Enum(loongarch_abi) String(lp32) Value(ABILP32) ++ ++EnumValue ++Enum(loongarch_abi) String(lpx32) Value(ABILPX32) ++ ++EnumValue ++Enum(loongarch_abi) String(lp64) Value(ABILP64) ++ ++march= ++Target RejectNegative Joined Var(loongarch_arch_option) ToLower Enum(loongarch_arch_opt_value) ++-march=ISA Generate code for the given ISA. ++ ++mbranch-cost= ++Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++-mbranch-cost=COST Set the cost of branches to roughly COST instructions. ++ ++mcheck-zero-division ++Target Report Mask(CHECK_ZERO_DIV) ++Trap on integer divide by zero. ++ ++mdouble-float ++Target Report RejectNegative InverseMask(SINGLE_FLOAT, DOUBLE_FLOAT) ++Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations. ++ ++mdebug ++Target Var(TARGET_DEBUG_MODE) Undocumented ++ ++mdebugd ++Target Var(TARGET_DEBUG_D_MODE) Undocumented ++ ++membedded-data ++Target Report Var(TARGET_EMBEDDED_DATA) ++Use ROM instead of RAM. ++ ++mextern-sdata ++Target Report Var(TARGET_EXTERN_SDATA) Init(1) ++Use -G for data that is not defined by the current object. ++ ++mfp-exceptions ++Target Report Var(TARGET_FP_EXCEPTIONS) Init(1) ++FP exceptions are enabled. ++ ++mfp32 ++Target Report RejectNegative InverseMask(FLOAT64) ++Use 32-bit floating-point registers. ++ ++mfp64 ++Target Report RejectNegative Mask(FLOAT64) ++Use 64-bit floating-point registers. ++ ++mflush-func= ++Target RejectNegative Joined Var(loongarch_cache_flush_func) Init(CACHE_FLUSH_FUNC) ++-mflush-func=FUNC Use FUNC to flush the cache before calling stack trampolines. ++ ++mgp32 ++Target Report RejectNegative InverseMask(64BIT) ++Use 32-bit general registers. ++ ++mgp64 ++Target Report RejectNegative Mask(64BIT) ++Use 64-bit general registers. ++ ++mgpopt ++Target Report Var(TARGET_GPOPT) Init(1) ++Use GP-relative addressing to access small data. ++ ++mhard-float ++Target Report RejectNegative InverseMask(SOFT_FLOAT_ABI, HARD_FLOAT_ABI) ++Allow the use of hardware floating-point ABI and instructions. ++ ++loongarch ++Target RejectNegative Joined ToLower Enum(loongarch_loongarch_opt_value) Var(loongarch_isa_option) ++-loongarchN Generate code for ISA level N. ++ ++mlocal-sdata ++Target Report Var(TARGET_LOCAL_SDATA) Init(1) ++Use -G for object-local data. ++ ++mlong-calls ++Target Report Var(TARGET_LONG_CALLS) ++Use indirect calls. ++ ++mlong32 ++Target Report RejectNegative InverseMask(LONG64, LONG32) ++Use a 32-bit long type. ++ ++mlong64 ++Target Report RejectNegative Mask(LONG64) ++Use a 64-bit long type. ++ ++mmemcpy ++Target Report Mask(MEMCPY) ++Don't optimize block moves. ++ ++mno-float ++Target Report RejectNegative Var(TARGET_NO_FLOAT) Condition(TARGET_SUPPORTS_NO_FLOAT) ++Prevent the use of all floating-point operations. ++ ++mno-flush-func ++Target RejectNegative ++Do not use a cache-flushing function before calling stack trampolines. ++ ++mrelax-pic-calls ++Target Report Mask(RELAX_PIC_CALLS) ++Try to allow the linker to turn PIC calls into direct calls. ++ ++mshared ++Target Report Var(TARGET_SHARED) Init(1) ++When generating -mabicalls code, make the code suitable for use in shared libraries. ++ ++msingle-float ++Target Report RejectNegative Mask(SINGLE_FLOAT) ++Restrict the use of hardware floating-point instructions to 32-bit operations. ++ ++msoft-float ++Target Report RejectNegative Mask(SOFT_FLOAT_ABI) ++Prevent the use of all hardware floating-point instructions. ++ ++msym32 ++Target Report Var(TARGET_SYM32) ++Assume all symbols have 32-bit values. ++ ++mlra ++Target Report Var(loongarch_lra_flag) Init(1) Save ++Use LRA instead of reload. ++ ++mtune= ++Target RejectNegative Joined Var(loongarch_tune_option) ToLower Enum(loongarch_arch_opt_value) ++-mtune=PROCESSOR Optimize the output for PROCESSOR. ++ ++muninit-const-in-rodata ++Target Report Var(TARGET_UNINIT_CONST_IN_RODATA) ++Put uninitialized constants in ROM (needs -membedded-data). ++ ++mxgot ++Target Report Var(TARGET_XGOT) ++Lift restrictions on GOT size. ++ ++mframe-header-opt ++Target Report Var(flag_frame_header_optimization) Optimization ++Optimize frame header. ++ ++noasmopt ++Driver ++ ++mload-store-pairs ++Target Report Var(TARGET_LOAD_STORE_PAIRS) Init(1) ++Enable load/store bonding. ++ ++mlvz ++Target Report Var(TARGET_LVZ) ++Use LoongArch Privileged state (LVZ) instructions. ++ ++mmax-inline-memcpy-size= ++Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. ++ ++mfix-loongson3-llsc ++Target Report Var(FIX_LOONGSON3_LLSC) ++Work around an Loongson3 llsc errata. ++ ++; The code model option names for -mcmodel. ++ ++Enum ++Name(cmodel) Type(enum loongarch_code_model) ++The code model option names for -mcmodel: ++ ++EnumValue ++Enum(cmodel) String(normal) Value(LARCH_CMODEL_NORMAL) ++ ++EnumValue ++Enum(cmodel) String(tiny) Value(LARCH_CMODEL_TINY) ++ ++EnumValue ++Enum(cmodel) String(tiny-static) Value(LARCH_CMODEL_TINY_STATIC) ++ ++EnumValue ++Enum(cmodel) String(large) Value(LARCH_CMODEL_LARGE) ++ ++EnumValue ++Enum(cmodel) String(extreme) Value(LARCH_CMODEL_EXTREME) ++ ++mcmodel= ++Target RejectNegative Joined Enum(cmodel) Var(loongarch_cmodel_var) Init(LARCH_CMODEL_NORMAL) Save ++Specify the code model. +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-opts.h gcc-10.3.0/gcc/config/loongarch/loongarch-opts.h +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-opts.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-opts.h 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,38 @@ ++/* Definitions for option handling for LARCH. ++ Copyright (C) 1989-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef LARCH_OPTS_H ++#define LARCH_OPTS_H ++ ++/* No enumeration is defined to index the -march= values (entries in ++ loongarch_cpu_info_table), with the type int being used instead, but we ++ need to distinguish the special "from-abi" and "native" values. */ ++#define LARCH_ARCH_OPTION_FROM_ABI -1 ++#define LARCH_ARCH_OPTION_NATIVE -2 ++ ++ ++enum loongarch_code_model { ++ LARCH_CMODEL_NORMAL, ++ LARCH_CMODEL_TINY, ++ LARCH_CMODEL_TINY_STATIC, ++ LARCH_CMODEL_LARGE, ++ LARCH_CMODEL_EXTREME ++}; ++ ++#endif +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-protos.h gcc-10.3.0/gcc/config/loongarch/loongarch-protos.h +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-protos.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-protos.h 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,276 @@ ++/* Prototypes of target machine for GNU compiler. LARCH version. ++ Copyright (C) 1989-2018 Free Software Foundation, Inc. ++ Contributed by A. Lichnewsky (lich@inria.inria.fr). ++ Changed by Michael Meissner (meissner@osf.org). ++ 64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and ++ Brendan Eich (brendan@microunity.com). ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_LARCH_PROTOS_H ++#define GCC_LARCH_PROTOS_H ++ ++/* Describes how a symbol is used. ++ ++ SYMBOL_CONTEXT_CALL ++ The symbol is used as the target of a call instruction. ++ ++ SYMBOL_CONTEXT_LEA ++ The symbol is used in a load-address operation. ++ ++ SYMBOL_CONTEXT_MEM ++ The symbol is used as the address in a MEM. */ ++enum loongarch_symbol_context { ++ SYMBOL_CONTEXT_CALL, ++ SYMBOL_CONTEXT_LEA, ++ SYMBOL_CONTEXT_MEM ++}; ++ ++/* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address. ++ ++ SYMBOL_GOT_DISP ++ The symbol's value will be loaded directly from the GOT. ++ ++ SYMBOL_TLS ++ A thread-local symbol. ++ ++ SYMBOL_TLSGD ++ SYMBOL_TLSLDM ++ UNSPEC wrappers around SYMBOL_TLS, corresponding to the ++ thread-local storage relocation operators. ++ */ ++enum loongarch_symbol_type { ++ SYMBOL_GOT_DISP, ++ SYMBOL_TLS, ++ SYMBOL_TLSGD, ++ SYMBOL_TLSLDM ++}; ++#define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1) ++ ++/* Classifies a type of call. ++ ++ LARCH_CALL_NORMAL ++ A normal call or call_value pattern. ++ ++ LARCH_CALL_SIBCALL ++ A sibcall or sibcall_value pattern. ++ ++ LARCH_CALL_EPILOGUE ++ A call inserted in the epilogue. */ ++enum loongarch_call_type { ++ LARCH_CALL_NORMAL, ++ LARCH_CALL_SIBCALL, ++ LARCH_CALL_EPILOGUE ++}; ++ ++/* Controls the conditions under which certain instructions are split. ++ ++ SPLIT_IF_NECESSARY ++ Only perform splits that are necessary for correctness ++ (because no unsplit version exists). ++ ++ SPLIT_FOR_SPEED ++ Perform splits that are necessary for correctness or ++ beneficial for code speed. ++ ++ SPLIT_FOR_SIZE ++ Perform splits that are necessary for correctness or ++ beneficial for code size. */ ++enum loongarch_split_type { ++ SPLIT_IF_NECESSARY, ++ SPLIT_FOR_SPEED, ++ SPLIT_FOR_SIZE ++}; ++ ++extern const char *const loongarch_fp_conditions[16]; ++ ++/***********************/ ++/* N_LARCH-PORT */ ++/***********************/ ++/* Routines implemented in n_loongarch.c. */ ++extern rtx n_loongarch_emit_move (rtx, rtx); ++extern const char *n_loongarch_output_gpr_save (unsigned); ++extern void n_loongarch_set_return_address (rtx, rtx); ++extern HOST_WIDE_INT n_loongarch_initial_elimination_offset (int, int); ++extern void n_loongarch_expand_prologue (void); ++extern void n_loongarch_expand_epilogue (bool); ++extern bool n_loongarch_can_use_return_insn (void); ++extern rtx n_loongarch_function_value (const_tree, const_tree, enum machine_mode); ++/***********************/ ++/* N_LARCH-PORT */ ++/***********************/ ++ ++extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_context, ++ enum loongarch_symbol_type *); ++extern int loongarch_regno_mode_ok_for_base_p (int, machine_mode, bool); ++extern bool loongarch_stack_address_p (rtx, machine_mode); ++extern int loongarch_address_insns (rtx, machine_mode, bool); ++extern int loongarch_const_insns (rtx); ++extern int loongarch_split_const_insns (rtx); ++extern int loongarch_split_128bit_const_insns (rtx); ++extern int loongarch_load_store_insns (rtx, rtx_insn *); ++extern int loongarch_idiv_insns (machine_mode); ++extern rtx_insn *loongarch_emit_move (rtx, rtx); ++#ifdef RTX_CODE ++extern void loongarch_emit_binary (enum rtx_code, rtx, rtx, rtx); ++#endif ++extern rtx loongarch_pic_base_register (rtx); ++extern bool loongarch_split_symbol (rtx, rtx, machine_mode, rtx *); ++extern rtx loongarch_unspec_address (rtx, enum loongarch_symbol_type); ++extern rtx loongarch_strip_unspec_address (rtx); ++extern void loongarch_move_integer (rtx, rtx, unsigned HOST_WIDE_INT); ++extern bool loongarch_legitimize_move (machine_mode, rtx, rtx); ++extern rtx loongarch_legitimize_call_address (rtx); ++ ++extern rtx loongarch_subword (rtx, bool); ++extern bool loongarch_split_move_p (rtx, rtx, enum loongarch_split_type); ++extern void loongarch_split_move (rtx, rtx, enum loongarch_split_type, rtx); ++extern bool loongarch_split_move_insn_p (rtx, rtx, rtx); ++extern void loongarch_split_move_insn (rtx, rtx, rtx); ++extern const char *loongarch_output_move (rtx, rtx); ++extern bool loongarch_cfun_has_cprestore_slot_p (void); ++extern bool loongarch_cprestore_address_p (rtx, bool); ++#ifdef RTX_CODE ++extern void loongarch_expand_scc (rtx *); ++extern void loongarch_expand_conditional_branch (rtx *); ++extern bool loongarch_expand_conditional_move (rtx *); ++extern void loongarch_expand_conditional_trap (rtx); ++#endif ++extern bool loongarch_get_pic_call_symbol (rtx *, int); ++extern void loongarch_set_return_address (rtx, rtx); ++extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); ++extern bool loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); ++extern bool loongarch_expand_block_move (rtx, rtx, rtx); ++ ++extern void loongarch_init_cumulative_args (CUMULATIVE_ARGS *, tree); ++extern bool loongarch_pad_reg_upward (machine_mode, tree); ++ ++extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT, ++ HOST_WIDE_INT, bool); ++extern bool loongarch_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT, ++ HOST_WIDE_INT); ++extern bool loongarch_mem_fits_mode_p (machine_mode mode, rtx x); ++extern HOST_WIDE_INT loongarch_debugger_offset (rtx, HOST_WIDE_INT); ++ ++extern void loongarch_push_asm_switch (struct loongarch_asm_switch *); ++extern void loongarch_pop_asm_switch (struct loongarch_asm_switch *); ++extern void loongarch_output_external (FILE *, tree, const char *); ++extern void loongarch_output_ascii (FILE *, const char *, size_t); ++extern void loongarch_output_aligned_decl_common (FILE *, tree, const char *, ++ unsigned HOST_WIDE_INT, ++ unsigned int); ++extern void loongarch_declare_common_object (FILE *, const char *, ++ const char *, unsigned HOST_WIDE_INT, ++ unsigned int, bool); ++extern void loongarch_declare_object (FILE *, const char *, const char *, ++ const char *, ...) ATTRIBUTE_PRINTF_4; ++extern void loongarch_declare_object_name (FILE *, const char *, tree); ++extern void loongarch_finish_declare_object (FILE *, tree, int, int); ++extern void loongarch_set_text_contents_type (FILE *, const char *, ++ unsigned long, bool); ++ ++extern bool loongarch_small_data_pattern_p (rtx); ++extern rtx loongarch_rewrite_small_data (rtx); ++extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); ++extern rtx loongarch_return_addr (int, rtx); ++extern bool loongarch_must_initialize_gp_p (void); ++extern void loongarch_emit_save_slot_move (rtx, rtx, rtx); ++extern void loongarch_expand_prologue (void); ++extern void loongarch_expand_epilogue (bool); ++extern bool loongarch_can_use_return_insn (void); ++ ++extern enum reg_class loongarch_secondary_reload_class (enum reg_class, ++ machine_mode, ++ rtx, bool); ++extern int loongarch_class_max_nregs (enum reg_class, machine_mode); ++ ++extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int, ++ unsigned int, ++ machine_mode); ++extern int loongarch_adjust_insn_length (rtx_insn *, int); ++extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *, ++ const char *, const char *); ++extern const char *loongarch_output_order_conditional_branch (rtx_insn *, rtx *, ++ bool); ++extern const char *loongarch_output_equal_conditional_branch (rtx_insn *, rtx *, ++ bool); ++extern const char *loongarch_output_division (const char *, rtx *); ++extern const char *loongarch_output_probe_stack_range (rtx, rtx); ++extern bool loongarch_hard_regno_rename_ok (unsigned int, unsigned int); ++extern bool loongarch_linked_madd_p (rtx_insn *, rtx_insn *); ++extern bool loongarch_store_data_bypass_p (rtx_insn *, rtx_insn *); ++extern int loongarch_dspalu_bypass_p (rtx, rtx); ++extern rtx loongarch_prefetch_cookie (rtx, rtx); ++ ++extern bool loongarch_global_symbol_p (const_rtx); ++extern bool loongarch_global_symbol_noweak_p (const_rtx); ++extern bool loongarch_weak_symbol_p (const_rtx); ++extern bool loongarch_symbol_binds_local_p (const_rtx); ++ ++extern const char *current_section_name (void); ++extern unsigned int current_section_flags (void); ++extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); ++ ++extern bool and_operands_ok (machine_mode, rtx, rtx); ++extern bool loongarch_fmadd_bypass (rtx_insn *, rtx_insn *); ++ ++union loongarch_gen_fn_ptrs ++{ ++ rtx (*fn_8) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); ++ rtx (*fn_7) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); ++ rtx (*fn_6) (rtx, rtx, rtx, rtx, rtx, rtx); ++ rtx (*fn_5) (rtx, rtx, rtx, rtx, rtx); ++ rtx (*fn_4) (rtx, rtx, rtx, rtx); ++}; ++ ++extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs, ++ rtx, rtx, rtx, rtx, rtx); ++ ++extern bool loongarch_signed_immediate_p (unsigned HOST_WIDE_INT, int, int); ++extern bool loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int); ++extern bool loongarch_load_store_pair_p (bool, rtx *); ++extern bool loongarch_movep_target_p (rtx, rtx); ++extern bool loongarch_12bit_offset_address_p (rtx, machine_mode); ++extern bool loongarch_14bit_shifted_offset_address_p (rtx, machine_mode); ++extern bool loongarch_9bit_offset_address_p (rtx, machine_mode); ++extern rtx loongarch_expand_thread_pointer (rtx); ++ ++extern bool loongarch_eh_uses (unsigned int); ++extern bool loongarch_epilogue_uses (unsigned int); ++extern int loongarch_trampoline_code_size (void); ++extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool); ++extern bool loongarch_split_symbol_type (enum loongarch_symbol_type); ++ ++typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx); ++ ++extern void loongarch_register_frame_header_opt (void); ++ ++extern void loongarch_declare_function_name(FILE *, const char *, tree); ++/* Routines implemented in loongarch-d.c */ ++extern void loongarch_d_target_versions (void); ++ ++/* Routines implemented in loongarch-c.c. */ ++void loongarch_cpu_cpp_builtins (cpp_reader *); ++ ++extern void loongarch_init_builtins (void); ++extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *); ++extern tree loongarch_builtin_decl (unsigned int, bool); ++extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED, ++ machine_mode, int); ++extern tree loongarch_build_builtin_va_list (void); ++ ++#endif /* ! GCC_LARCH_PROTOS_H */ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/loongarch-tables.opt gcc-10.3.0/gcc/config/loongarch/loongarch-tables.opt +--- gcc-10.3.0.org/gcc/config/loongarch/loongarch-tables.opt 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/loongarch-tables.opt 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,50 @@ ++; -*- buffer-read-only: t -*- ++; Generated automatically by genopt.sh from loongarch-cpus.def. ++ ++; Copyright (C) 2011-2018 Free Software Foundation, Inc. ++; ++; This file is part of GCC. ++; ++; GCC is free software; you can redistribute it and/or modify it under ++; the terms of the GNU General Public License as published by the Free ++; Software Foundation; either version 3, or (at your option) any later ++; version. ++; ++; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++; for more details. ++; ++; You should have received a copy of the GNU General Public License ++; along with GCC; see the file COPYING3. If not see ++; . ++ ++Enum ++Name(loongarch_arch_opt_value) Type(int) ++Known LARCH CPUs (for use with the -march= and -mtune= options): ++ ++Enum ++Name(loongarch_loongarch_opt_value) Type(int) ++Known LARCH ISA levels (for use with the -loongarch option): ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(from-abi) Value(LARCH_ARCH_OPTION_FROM_ABI) ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(loongarch) Value(0) Canonical ++ ++EnumValue ++Enum(loongarch_loongarch_opt_value) String() Value(0) ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(loongarch64) Value(1) Canonical ++ ++EnumValue ++Enum(loongarch_loongarch_opt_value) String(64) Value(1) ++ ++EnumValue ++Enum(loongarch_arch_opt_value) String(gs464v) Value(2) Canonical ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/lvzintrin.h gcc-10.3.0/gcc/config/loongarch/lvzintrin.h +--- gcc-10.3.0.org/gcc/config/loongarch/lvzintrin.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/lvzintrin.h 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,87 @@ ++/* Intrinsics for LoongArch vz operations. ++ ++ Copyright (C) 2019 Free Software Foundation, Inc. ++ Contributed by xuchenghua@loongson.cn. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _GCC_LOONGARCH_LVZ_H ++#define _GCC_LOONGARCH_LVZ_H ++ ++#define __lvz_gcsrrd __builtin_lvz_gcsrrd ++#define __lvz_gcsrwr __builtin_lvz_gcsrwr ++#define __lvz_gcsrxchg __builtin_lvz_gcsrxchg ++#define __lvz_dgcsrrd __builtin_lvz_dgcsrrd ++#define __lvz_dgcsrwr __builtin_lvz_dgcsrwr ++#define __lvz_dgcsrxchg __builtin_lvz_dgcsrxchg ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_gtlbsrch (void) ++{ ++ __asm__ volatile ("gtlbsrch\n\t"); ++} ++#define __lvz_gtlbsrch __builtin_lvz_gtlbsrch ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_gtlbrd (void) ++{ ++ __asm__ volatile ("gtlbrd\n\t"); ++} ++#define __lvz_gtlbrd __builtin_lvz_gtlbrd ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_gtlbwr (void) ++{ ++ __asm__ volatile ("gtlbwr\n\t"); ++} ++#define __lvz_gtlbwr __builtin_lvz_gtlbwr ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_gtlbfill (void) ++{ ++ __asm__ volatile ("gtlbfill\n\t"); ++} ++#define __lvz_gtlbfill __builtin_lvz_gtlbfill ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_gtlbclr (void) ++{ ++ __asm__ volatile ("gtlbclr\n\t"); ++} ++#define __lvz_gtlbclr __builtin_lvz_gtlbclr ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_gtlbflush (void) ++{ ++ __asm__ volatile ("gtlbflush\n\t"); ++} ++#define __lvz_gtlbflush __builtin_lvz_gtlbflush ++ ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_lvz_hvcl (void) ++{ ++ __asm__ volatile ("hvcl\n\t"); ++} ++#define __lvz_hvcl __builtin_lvz_hvcl ++ ++ ++#endif /*_GCC_LOONGARCH_LVZ_H */ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/lvz.md gcc-10.3.0/gcc/config/loongarch/lvz.md +--- gcc-10.3.0.org/gcc/config/loongarch/lvz.md 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/lvz.md 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,60 @@ ++;; Machine Description for LoongArch vz instructions. ++;; Copyright (C) 1989-2014 Free Software Foundation, Inc. ++;; Contributed by xuchenghua@loongson.cn ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++ ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_c_enum "unspec" [ ++ ;; lvz ++ UNSPEC_LVZ_GCSRXCHG ++ UNSPEC_LVZ_GCSRRD ++ UNSPEC_LVZ_GCSRWR ++ ++]) ++ ++(define_insn "lvz_

gcsrxchg" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "register_operand" "0") ++ (match_operand:GPR 2 "register_operand" "q") ++ (match_operand 3 "const_uimm14_operand")] ++ UNSPEC_LVZ_GCSRXCHG))] ++ "ISA_HAS_LVZ" ++ "gcsrxchg\t%0,%2,%3" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "lvz_

gcsrrd" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unspec_volatile:GPR [(match_operand 1 "const_uimm14_operand")] ++ UNSPEC_LVZ_GCSRRD))] ++ "ISA_HAS_LVZ" ++ "gcsrrd\t%0,%1" ++ [(set_attr "type" "load") ++ (set_attr "mode" "")]) ++ ++(define_insn "lvz_

gcsrwr" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "register_operand" "0") ++ (match_operand 2 "const_uimm14_operand")] ++ UNSPEC_LVZ_GCSRWR))] ++ "ISA_HAS_LVZ" ++ "gcsrwr\t%0,%2" ++ [(set_attr "type" "store") ++ (set_attr "mode" "")]) ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/predicates.md gcc-10.3.0/gcc/config/loongarch/predicates.md +--- gcc-10.3.0.org/gcc/config/loongarch/predicates.md 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/predicates.md 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,561 @@ ++;; Predicate definitions for LARCH. ++;; Copyright (C) 2004-2018 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_predicate "const_uns_arith_operand" ++ (and (match_code "const_int") ++ (match_test "SMALL_OPERAND_UNSIGNED (INTVAL (op))"))) ++ ++(define_predicate "uns_arith_operand" ++ (ior (match_operand 0 "const_uns_arith_operand") ++ (match_operand 0 "register_operand"))) ++ ++(define_predicate "const_lu32i_operand" ++ (and (match_code "const_int") ++ (match_test "LU32I_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "const_lu52i_operand" ++ (and (match_code "const_int") ++ (match_test "LU52I_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "const_arith_operand" ++ (and (match_code "const_int") ++ (match_test "IMM12_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "const_imm16_operand" ++ (and (match_code "const_int") ++ (match_test "IMM16_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "arith_operand" ++ (ior (match_operand 0 "const_arith_operand") ++ (match_operand 0 "register_operand"))) ++ ++(define_predicate "const_immlsa_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 1, 4)"))) ++ ++(define_predicate "const_uimm3_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) ++ ++(define_predicate "const_uimm4_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) ++ ++(define_predicate "const_uimm5_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) ++ ++(define_predicate "const_uimm6_operand" ++ (and (match_code "const_int") ++ (match_test "UIMM6_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "const_uimm7_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 127)"))) ++ ++(define_predicate "const_uimm8_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 255)"))) ++ ++(define_predicate "const_uimm14_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 16383)"))) ++ ++(define_predicate "const_uimm15_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 32767)"))) ++ ++(define_predicate "const_imm5_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), -16, 15)"))) ++ ++(define_predicate "const_imm10_operand" ++ (and (match_code "const_int") ++ (match_test "IMM10_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "const_imm12_operand" ++ (and (match_code "const_int") ++ (match_test "IMM12_OPERAND (INTVAL (op))"))) ++ ++(define_predicate "reg_imm10_operand" ++ (ior (match_operand 0 "const_imm10_operand") ++ (match_operand 0 "register_operand"))) ++ ++(define_predicate "aq8b_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)"))) ++ ++(define_predicate "aq8h_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 1)"))) ++ ++(define_predicate "aq8w_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 2)"))) ++ ++(define_predicate "aq8d_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) ++ ++(define_predicate "aq10b_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)"))) ++ ++(define_predicate "aq10h_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)"))) ++ ++(define_predicate "aq10w_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) ++ ++(define_predicate "aq10d_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)"))) ++ ++(define_predicate "aq12b_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)"))) ++ ++(define_predicate "aq12h_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 11, 1)"))) ++ ++(define_predicate "aq12w_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) ++ ++(define_predicate "aq12d_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 9, 3)"))) ++ ++(define_predicate "sle_operand" ++ (and (match_code "const_int") ++ (match_test "SMALL_OPERAND (INTVAL (op) + 1)"))) ++ ++(define_predicate "sleu_operand" ++ (and (match_operand 0 "sle_operand") ++ (match_test "INTVAL (op) + 1 != 0"))) ++ ++(define_predicate "const_0_operand" ++ (and (match_code "const_int,const_double,const_vector") ++ (match_test "op == CONST0_RTX (GET_MODE (op))"))) ++ ++(define_predicate "const_m1_operand" ++ (and (match_code "const_int,const_double,const_vector") ++ (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) ++ ++(define_predicate "reg_or_m1_operand" ++ (ior (match_operand 0 "const_m1_operand") ++ (match_operand 0 "register_operand"))) ++ ++(define_predicate "reg_or_0_operand" ++ (ior (match_operand 0 "const_0_operand") ++ (match_operand 0 "register_operand"))) ++ ++(define_predicate "const_1_operand" ++ (and (match_code "const_int,const_double,const_vector") ++ (match_test "op == CONST1_RTX (GET_MODE (op))"))) ++ ++(define_predicate "reg_or_1_operand" ++ (ior (match_operand 0 "const_1_operand") ++ (match_operand 0 "register_operand"))) ++ ++;; These are used in vec_merge, hence accept bitmask as const_int. ++(define_predicate "const_exp_2_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 1)"))) ++ ++(define_predicate "const_exp_4_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 3)"))) ++ ++(define_predicate "const_exp_8_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 7)"))) ++ ++(define_predicate "const_exp_16_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 15)"))) ++ ++(define_predicate "const_exp_32_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 31)"))) ++ ++;; This is used for indexing into vectors, and hence only accepts const_int. ++(define_predicate "const_0_or_1_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) ++ ++(define_predicate "const_2_or_3_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) ++ ++(define_predicate "const_0_to_3_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 3)"))) ++ ++(define_predicate "const_0_to_7_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) ++ ++(define_predicate "const_4_to_7_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 4, 7)"))) ++ ++(define_predicate "const_8_to_15_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) ++ ++(define_predicate "const_16_to_31_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) ++ ++(define_predicate "qi_mask_operand" ++ (and (match_code "const_int") ++ (match_test "UINTVAL (op) == 0xff"))) ++ ++(define_predicate "hi_mask_operand" ++ (and (match_code "const_int") ++ (match_test "UINTVAL (op) == 0xffff"))) ++ ++(define_predicate "shift_mask_operand" ++ (and (match_code "const_int") ++ (ior (match_test "UINTVAL (op) == 0x3fffffffc") ++ (match_test "UINTVAL (op) == 0x1fffffffe") ++ (match_test "UINTVAL (op) == 0x7fffffff8") ++ (match_test "UINTVAL (op) == 0xffffffff0")))) ++ ++ ++ ++(define_predicate "si_mask_operand" ++ (and (match_code "const_int") ++ (match_test "UINTVAL (op) == 0xffffffff"))) ++ ++(define_predicate "and_load_operand" ++ (ior (match_operand 0 "qi_mask_operand") ++ (match_operand 0 "hi_mask_operand") ++ (match_operand 0 "si_mask_operand"))) ++ ++(define_predicate "low_bitmask_operand" ++ (and (match_code "const_int") ++ (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) ++ ++(define_predicate "and_reg_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "const_uns_arith_operand") ++ (match_operand 0 "low_bitmask_operand") ++ (match_operand 0 "si_mask_operand"))) ++ ++(define_predicate "and_operand" ++ (ior (match_operand 0 "and_load_operand") ++ (match_operand 0 "and_reg_operand"))) ++ ++(define_predicate "d_operand" ++ (and (match_code "reg") ++ (match_test "GP_REG_P (REGNO (op))"))) ++ ++(define_predicate "db4_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 4, 0)"))) ++ ++(define_predicate "db7_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 7, 0)"))) ++ ++(define_predicate "db8_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 8, 0)"))) ++ ++(define_predicate "ib3_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op) - 1, 3, 0)"))) ++ ++(define_predicate "sb4_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 4, 0)"))) ++ ++(define_predicate "sb5_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 5, 0)"))) ++ ++(define_predicate "sb8_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)"))) ++ ++(define_predicate "sd8_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) ++ ++(define_predicate "ub4_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 0)"))) ++ ++(define_predicate "ub8_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 0)"))) ++ ++(define_predicate "uh4_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 1)"))) ++ ++(define_predicate "uw4_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 2)"))) ++ ++(define_predicate "uw5_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 5, 2)"))) ++ ++(define_predicate "uw6_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 6, 2)"))) ++ ++(define_predicate "uw8_operand" ++ (and (match_code "const_int") ++ (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 2)"))) ++ ++(define_predicate "addiur2_operand" ++ (and (match_code "const_int") ++ (ior (match_test "INTVAL (op) == -1") ++ (match_test "INTVAL (op) == 1") ++ (match_test "INTVAL (op) == 4") ++ (match_test "INTVAL (op) == 8") ++ (match_test "INTVAL (op) == 12") ++ (match_test "INTVAL (op) == 16") ++ (match_test "INTVAL (op) == 20") ++ (match_test "INTVAL (op) == 24")))) ++ ++(define_predicate "addiusp_operand" ++ (and (match_code "const_int") ++ (ior (match_test "(IN_RANGE (INTVAL (op), 2, 257))") ++ (match_test "(IN_RANGE (INTVAL (op), -258, -3))")))) ++ ++(define_predicate "andi16_operand" ++ (and (match_code "const_int") ++ (ior (match_test "IN_RANGE (INTVAL (op), 1, 4)") ++ (match_test "IN_RANGE (INTVAL (op), 7, 8)") ++ (match_test "IN_RANGE (INTVAL (op), 15, 16)") ++ (match_test "IN_RANGE (INTVAL (op), 31, 32)") ++ (match_test "IN_RANGE (INTVAL (op), 63, 64)") ++ (match_test "INTVAL (op) == 255") ++ (match_test "INTVAL (op) == 32768") ++ (match_test "INTVAL (op) == 65535")))) ++ ++(define_predicate "movep_src_register" ++ (and (match_code "reg") ++ (ior (match_test ("IN_RANGE (REGNO (op), 2, 3)")) ++ (match_test ("IN_RANGE (REGNO (op), 16, 20)"))))) ++ ++(define_predicate "movep_src_operand" ++ (ior (match_operand 0 "const_0_operand") ++ (match_operand 0 "movep_src_register"))) ++ ++(define_predicate "fcc_reload_operand" ++ (and (match_code "reg,subreg") ++ (match_test "ST_REG_P (true_regnum (op))"))) ++ ++(define_predicate "muldiv_target_operand" ++ (match_operand 0 "register_operand")) ++ ++(define_predicate "const_call_insn_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type symbol_type; ++ ++ if (!loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_CALL, &symbol_type)) ++ return false; ++ ++ switch (symbol_type) ++ { ++ case SYMBOL_GOT_DISP: ++ /* Without explicit relocs, there is no special syntax for ++ loading the address of a call destination into a register. ++ Using "la $25,foo; jal $25" would prevent the lazy binding ++ of "foo", so keep the address of global symbols with the ++ jal macro. */ ++ return 1; ++ ++ default: ++ return false; ++ } ++}) ++ ++(define_predicate "call_insn_operand" ++ (ior (match_operand 0 "const_call_insn_operand") ++ (match_operand 0 "register_operand"))) ++ ++(define_predicate "is_const_call_local_symbol" ++ (and (match_operand 0 "const_call_insn_operand") ++ (ior (match_test "loongarch_global_symbol_p (op) == 0") ++ (match_test "loongarch_symbol_binds_local_p (op) != 0")) ++ (match_test "CONSTANT_P (op)"))) ++ ++(define_predicate "is_const_call_weak_symbol" ++ (and (match_operand 0 "const_call_insn_operand") ++ (not (match_operand 0 "is_const_call_local_symbol")) ++ (match_test "loongarch_weak_symbol_p (op) != 0") ++ (match_test "CONSTANT_P (op)"))) ++ ++(define_predicate "is_const_call_plt_symbol" ++ (and (match_operand 0 "const_call_insn_operand") ++ (match_test "flag_plt != 0") ++ (match_test "loongarch_global_symbol_noweak_p (op) != 0") ++ (match_test "CONSTANT_P (op)"))) ++ ++(define_predicate "is_const_call_global_noplt_symbol" ++ (and (match_operand 0 "const_call_insn_operand") ++ (match_test "flag_plt == 0") ++ (match_test "loongarch_global_symbol_noweak_p (op) != 0") ++ (match_test "CONSTANT_P (op)"))) ++ ++;; A legitimate CONST_INT operand that takes more than one instruction ++;; to load. ++(define_predicate "splittable_const_int_operand" ++ (match_code "const_int") ++{ ++ ++ /* Don't handle multi-word moves this way; we don't want to introduce ++ the individual word-mode moves until after reload. */ ++ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ++ return false; ++ ++ /* Otherwise check whether the constant can be loaded in a single ++ instruction. */ ++// return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op); ++ return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op) ++ && !LU52I_INT (op); ++}) ++ ++(define_predicate "move_operand" ++ (match_operand 0 "general_operand") ++{ ++ enum loongarch_symbol_type symbol_type; ++ ++ /* The thinking here is as follows: ++ ++ (1) The move expanders should split complex load sequences into ++ individual instructions. Those individual instructions can ++ then be optimized by all rtl passes. ++ ++ (2) The target of pre-reload load sequences should not be used ++ to store temporary results. If the target register is only ++ assigned one value, reload can rematerialize that value ++ on demand, rather than spill it to the stack. ++ ++ (3) If we allowed pre-reload passes like combine and cse to recreate ++ complex load sequences, we would want to be able to split the ++ sequences before reload as well, so that the pre-reload scheduler ++ can see the individual instructions. This falls foul of (2); ++ the splitter would be forced to reuse the target register for ++ intermediate results. ++ ++ (4) We want to define complex load splitters for combine. These ++ splitters can request a temporary scratch register, which avoids ++ the problem in (2). They allow things like: ++ ++ (set (reg T1) (high SYM)) ++ (set (reg T2) (low (reg T1) SYM)) ++ (set (reg X) (plus (reg T2) (const_int OFFSET))) ++ ++ to be combined into: ++ ++ (set (reg T3) (high SYM+OFFSET)) ++ (set (reg X) (lo_sum (reg T3) SYM+OFFSET)) ++ ++ if T2 is only used this once. */ ++ switch (GET_CODE (op)) ++ { ++ case CONST_INT: ++ return !splittable_const_int_operand (op, mode); ++ ++ case CONST: ++ case SYMBOL_REF: ++ case LABEL_REF: ++ return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)); ++ default: ++ return true; ++ } ++}) ++ ++(define_predicate "consttable_operand" ++ (match_test "CONSTANT_P (op)")) ++ ++(define_predicate "symbolic_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type type; ++ return loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type); ++}) ++ ++(define_predicate "force_to_mem_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type symbol_type; ++ return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type) ++ && loongarch_use_pcrel_pool_p[(int) symbol_type]); ++}) ++ ++(define_predicate "got_disp_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type type; ++ return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type) ++ && type == SYMBOL_GOT_DISP); ++}) ++ ++(define_predicate "symbol_ref_operand" ++ (match_code "symbol_ref")) ++ ++(define_predicate "stack_operand" ++ (and (match_code "mem") ++ (match_test "loongarch_stack_address_p (XEXP (op, 0), GET_MODE (op))"))) ++ ++ ++ ++(define_predicate "equality_operator" ++ (match_code "eq,ne")) ++ ++(define_predicate "extend_operator" ++ (match_code "zero_extend,sign_extend")) ++ ++(define_predicate "trap_comparison_operator" ++ (match_code "eq,ne,lt,ltu,ge,geu")) ++ ++(define_predicate "order_operator" ++ (match_code "lt,ltu,le,leu,ge,geu,gt,gtu")) ++ ++;; For NE, cstore uses sltu instructions in which the first operand is $0. ++ ++(define_predicate "loongarch_cstore_operator" ++ (ior (match_code "eq,gt,gtu,ge,geu,lt,ltu,le,leu") ++ (match_code "ne"))) ++ ++(define_predicate "small_data_pattern" ++ (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") ++ (match_test "loongarch_small_data_pattern_p (op)"))) ++ ++(define_predicate "mem_noofs_operand" ++ (and (match_code "mem") ++ (match_code "reg" "0"))) ++ ++;; Return 1 if the operand is in non-volatile memory. ++(define_predicate "non_volatile_mem_operand" ++ (and (match_operand 0 "memory_operand") ++ (not (match_test "MEM_VOLATILE_P (op)")))) +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/rtems.h gcc-10.3.0/gcc/config/loongarch/rtems.h +--- gcc-10.3.0.org/gcc/config/loongarch/rtems.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/rtems.h 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,39 @@ ++/* Definitions for rtems targeting a LARCH using ELF. ++ Copyright (C) 1996-2018 Free Software Foundation, Inc. ++ Contributed by Joel Sherrill (joel@OARcorp.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Specify predefined symbols in preprocessor. */ ++ ++#define TARGET_OS_CPP_BUILTINS() \ ++do { \ ++ builtin_define ("__rtems__"); \ ++ builtin_define ("__USE_INIT_FINI__"); \ ++ builtin_assert ("system=rtems"); \ ++} while (0) ++ ++/* No sdata. ++ * The RTEMS BSPs expect -G0 ++ */ ++#undef LARCH_DEFAULT_GVALUE ++#define LARCH_DEFAULT_GVALUE 0 +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/sde.opt gcc-10.3.0/gcc/config/loongarch/sde.opt +--- gcc-10.3.0.org/gcc/config/loongarch/sde.opt 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/sde.opt 2022-03-23 17:40:29.350280095 +0800 +@@ -0,0 +1,28 @@ ++; LARCH SDE options. ++; ++; Copyright (C) 2010-2018 Free Software Foundation, Inc. ++; ++; This file is part of GCC. ++; ++; GCC is free software; you can redistribute it and/or modify it under ++; the terms of the GNU General Public License as published by the Free ++; Software Foundation; either version 3, or (at your option) any later ++; version. ++; ++; GCC is distributed in the hope that it will be useful, but WITHOUT ++; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++; License for more details. ++; ++; You should have received a copy of the GNU General Public License ++; along with GCC; see the file COPYING3. If not see ++; . ++ ++; -mcode-xonly is a traditional alias for -mcode-readable=pcrel and ++; -mno-data-in-code is a traditional alias for -mcode-readable=no. ++ ++mno-data-in-code ++Target RejectNegative Alias(mcode-readable=, no) ++ ++mcode-xonly ++Target RejectNegative Alias(mcode-readable=, pcrel) +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/sync.md gcc-10.3.0/gcc/config/loongarch/sync.md +--- gcc-10.3.0.org/gcc/config/loongarch/sync.md 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/sync.md 2022-03-23 17:40:29.351280087 +0800 +@@ -0,0 +1,555 @@ ++;; Machine description for LARCH atomic operations. ++;; Copyright (C) 2011-2018 Free Software Foundation, Inc. ++;; Contributed by Andrew Waterman (andrew@sifive.com). ++;; Based on LARCH target for GNU compiler. ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++ ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_c_enum "unspec" [ ++ UNSPEC_COMPARE_AND_SWAP ++ UNSPEC_COMPARE_AND_SWAP_ADD ++ UNSPEC_COMPARE_AND_SWAP_SUB ++ UNSPEC_COMPARE_AND_SWAP_AND ++ UNSPEC_COMPARE_AND_SWAP_XOR ++ UNSPEC_COMPARE_AND_SWAP_OR ++ UNSPEC_COMPARE_AND_SWAP_NAND ++ UNSPEC_SYNC_OLD_OP ++ UNSPEC_SYNC_EXCHANGE ++ UNSPEC_ATOMIC_STORE ++ UNSPEC_MEMORY_BARRIER ++]) ++ ++(define_code_iterator any_atomic [plus ior xor and]) ++(define_code_attr atomic_optab ++ [(plus "add") (ior "or") (xor "xor") (and "and")]) ++ ++;; This attribute gives the format suffix for atomic memory operations. ++(define_mode_attr amo [(SI "w") (DI "d")]) ++ ++;; expands to the name of the atomic operand that implements a particular code. ++(define_code_attr amop [(ior "or") ++ (xor "xor") ++ (and "and") ++ (plus "add")]) ++;; Memory barriers. ++ ++(define_expand "mem_thread_fence" ++ [(match_operand:SI 0 "const_int_operand" "")] ;; model ++ "" ++{ ++ if (INTVAL (operands[0]) != MEMMODEL_RELAXED) ++ { ++ rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (mem) = 1; ++ emit_insn (gen_mem_thread_fence_1 (mem, operands[0])); ++ } ++ DONE; ++}) ++ ++;; Until the LARCH memory model (hence its mapping from C++) is finalized, ++;; conservatively emit a full FENCE. ++(define_insn "mem_thread_fence_1" ++ [(set (match_operand:BLK 0 "" "") ++ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) ++ (match_operand:SI 1 "const_int_operand" "")] ;; model ++ "" ++ "dbar\t0") ++ ++;; Atomic memory operations. ++ ++;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. ++(define_insn "atomic_store" ++ [(set (match_operand:GPR 0 "memory_operand" "+ZB") ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "reg_or_0_operand" "rJ") ++ (match_operand:SI 2 "const_int_operand")] ;; model ++ UNSPEC_ATOMIC_STORE))] ++ "" ++ "amswap%A2.\t$zero,%z1,%0" ++ [(set (attr "length") (const_int 8))]) ++ ++(define_insn "atomic_" ++ [(set (match_operand:GPR 0 "memory_operand" "+ZB") ++ (unspec_volatile:GPR ++ [(any_atomic:GPR (match_dup 0) ++ (match_operand:GPR 1 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 2 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++ "am%A2.\t$zero,%z1,%0" ++ [(set (attr "length") (const_int 8))]) ++ ++(define_insn "atomic_fetch_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ++ (match_operand:GPR 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR ++ [(any_atomic:GPR (match_dup 1) ++ (match_operand:GPR 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++ "am%A3.\t%0,%z2,%1" ++ [(set (attr "length") (const_int 8))]) ++ ++(define_insn "atomic_exchange" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ++ (unspec_volatile:GPR ++ [(match_operand:GPR 1 "memory_operand" "+ZB") ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_EXCHANGE)) ++ (set (match_dup 1) ++ (match_operand:GPR 2 "register_operand" "r"))] ++ "" ++ "amswap%A3.\t%0,%z2,%1" ++ [(set (attr "length") (const_int 8))]) ++ ++(define_insn "atomic_cas_value_strong" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ++ (match_operand:SI 4 "const_int_operand") ;; mod_s ++ (match_operand:SI 5 "const_int_operand")] ;; mod_f ++ UNSPEC_COMPARE_AND_SWAP)) ++ (clobber (match_scratch:GPR 6 "=&r"))] ++ "" ++{ ++ if (FIX_LOONGSON3_LLSC) ++ return "%G5\n\t1:\n\tll.\t%0,%1\n\tbne\t%0,%z2,2f\n\tor%i3\t%6,$zero,%3\n\tsc.\t%6,%1\n\tbeq\t$zero,%6,1b\n\t2:\n\tdbar\t0"; ++ else ++ return "%G5\n\t1:\n\tll.\t%0,%1\n\tbne\t%0,%z2,2f\n\tor%i3\t%6,$zero,%3\n\tsc.\t%6,%1\n\tbeq\t$zero,%6,1b\n\t2:"; ++ ++} ++ [(set (attr "length") (const_int 20))]) ++ ++(define_expand "atomic_compare_and_swap" ++ [(match_operand:SI 0 "register_operand" "") ;; bool output ++ (match_operand:GPR 1 "register_operand" "") ;; val output ++ (match_operand:GPR 2 "memory_operand" "") ;; memory ++ (match_operand:GPR 3 "reg_or_0_operand" "") ;; expected value ++ (match_operand:GPR 4 "reg_or_0_operand" "") ;; desired value ++ (match_operand:SI 5 "const_int_operand" "") ;; is_weak ++ (match_operand:SI 6 "const_int_operand" "") ;; mod_s ++ (match_operand:SI 7 "const_int_operand" "")] ;; mod_f ++ "" ++{ ++ emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6], operands[7])); ++ ++ rtx compare = operands[1]; ++ if (operands[3] != const0_rtx) ++ { ++ rtx difference = gen_rtx_MINUS (mode, operands[1], operands[3]); ++ compare = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (compare, difference)); ++ } ++ ++ if (word_mode != mode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); ++ compare = reg; ++ } ++ ++ emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx))); ++ DONE; ++}) ++ ++(define_expand "atomic_test_and_set" ++ [(match_operand:QI 0 "register_operand" "") ;; bool output ++ (match_operand:QI 1 "memory_operand" "+ZB") ;; memory ++ (match_operand:SI 2 "const_int_operand" "")] ;; model ++ "" ++{ ++ /* We have no QImode atomics, so use the address LSBs to form a mask, ++ then use an aligned SImode atomic. */ ++ rtx result = operands[0]; ++ rtx mem = operands[1]; ++ rtx model = operands[2]; ++ rtx addr = force_reg (Pmode, XEXP (mem, 0)); ++ rtx tmp_reg = gen_reg_rtx (Pmode); ++ rtx zero_reg = gen_rtx_REG (Pmode, 0); ++ ++ rtx aligned_addr = gen_reg_rtx (Pmode); ++ emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4))); ++ emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg)); ++ ++ rtx aligned_mem = change_address (mem, SImode, aligned_addr); ++ set_mem_alias_set (aligned_mem, 0); ++ ++ rtx offset = gen_reg_rtx (SImode); ++ emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), ++ GEN_INT (3))); ++ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_move_insn (tmp, GEN_INT (1)); ++ ++ rtx shmt = gen_reg_rtx (SImode); ++ emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3))); ++ ++ rtx word = gen_reg_rtx (SImode); ++ emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt)); ++ ++ tmp = gen_reg_rtx (SImode); ++ emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model)); ++ ++ emit_move_insn (gen_lowpart (SImode, result), ++ gen_rtx_LSHIFTRT (SImode, tmp, shmt)); ++ DONE; ++}) ++ ++ ++ ++(define_insn "atomic_cas_value_cmp_and_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP)) ++ (clobber (match_scratch:GPR 7 "=&r"))] ++ "" ++{ ++ if (FIX_LOONGSON3_LLSC) ++ return "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%2\n\tbne\t%7,%z4,2f\n\tand\t%7,%0,%z3\n\tor%i5\t%7,%7,%5\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b\n\t2:\n\tdbar\t0"; ++ else ++ return "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%2\n\tbne\t%7,%z4,2f\n\tand\t%7,%0,%z3\n\tor%i5\t%7,%7,%5\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b\n\t2:"; ++} ++ [(set (attr "length") (const_int 20))]) ++ ++ ++(define_expand "atomic_compare_and_swap" ++ [(match_operand:SI 0 "register_operand" "") ;; bool output ++ (match_operand:SHORT 1 "register_operand" "") ;; val output ++ (match_operand:SHORT 2 "memory_operand" "") ;; memory ++ (match_operand:SHORT 3 "reg_or_0_operand" "") ;; expected value ++ (match_operand:SHORT 4 "reg_or_0_operand" "") ;; desired value ++ (match_operand:SI 5 "const_int_operand" "") ;; is_weak ++ (match_operand:SI 6 "const_int_operand" "") ;; mod_s ++ (match_operand:SI 7 "const_int_operand" "")] ;; mod_f ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[1], ++ operands[2], ++ operands[3], ++ operands[4], ++ operands[7]); ++ ++ rtx compare = operands[1]; ++ if (operands[3] != const0_rtx) ++ { ++ machine_mode mode = GET_MODE (operands[3]); ++ rtx op1 = convert_modes (SImode, mode, operands[1], true); ++ rtx op3 = convert_modes (SImode, mode, operands[3], true); ++ rtx difference = gen_rtx_MINUS (SImode, op1, op3); ++ compare = gen_reg_rtx (SImode); ++ emit_insn (gen_rtx_SET (compare, difference)); ++ } ++ ++ if (word_mode != mode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); ++ compare = reg; ++ } ++ ++ emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx))); ++ DONE; ++}) ++ ++ ++ ++ ++(define_insn "atomic_cas_value_add_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP_ADD)) ++ (clobber (match_scratch:GPR 7 "=&r")) ++ (clobber (match_scratch:GPR 8 "=&r"))] ++ "" ++ "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tadd.w\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" ++ [(set (attr "length") (const_int 20))]) ++ ++ ++ ++(define_insn "atomic_cas_value_sub_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP_SUB)) ++ (clobber (match_scratch:GPR 7 "=&r")) ++ (clobber (match_scratch:GPR 8 "=&r"))] ++ "" ++ "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tsub.w\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" ++ [(set (attr "length") (const_int 20))]) ++ ++ ++ ++(define_insn "atomic_cas_value_and_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP_AND)) ++ (clobber (match_scratch:GPR 7 "=&r")) ++ (clobber (match_scratch:GPR 8 "=&r"))] ++ "" ++ "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tand\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" ++ [(set (attr "length") (const_int 20))]) ++ ++(define_insn "atomic_cas_value_xor_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP_XOR)) ++ (clobber (match_scratch:GPR 7 "=&r")) ++ (clobber (match_scratch:GPR 8 "=&r"))] ++ "" ++ "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\txor\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" ++ [(set (attr "length") (const_int 20))]) ++ ++(define_insn "atomic_cas_value_or_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP_OR)) ++ (clobber (match_scratch:GPR 7 "=&r")) ++ (clobber (match_scratch:GPR 8 "=&r"))] ++ "" ++ "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tor\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" ++ [(set (attr "length") (const_int 20))]) ++ ++(define_insn "atomic_cas_value_nand_7_" ++ [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res ++ (match_operand:GPR 1 "memory_operand" "+ZC")) ++ (set (match_dup 1) ++ (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask ++ (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask ++ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val ++ (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val ++ (match_operand:SI 6 "const_int_operand")] ;; model ++ UNSPEC_COMPARE_AND_SWAP_NAND)) ++ (clobber (match_scratch:GPR 7 "=&r")) ++ (clobber (match_scratch:GPR 8 "=&r"))] ++ "" ++ "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tand\t%8,%0,%z5\n\txor\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" ++ [(set (attr "length") (const_int 20))]) ++ ++;;(define_insn "atomic_cas_value_llsc_6_" ++;; [(set (match_operand:GPR 0 "register_operand" "=&r") ++;; (match_operand:GPR 1 "memory_operand" "+ZB")) ++;; (set (match_dup 1) ++;; (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ++;; (match_operand:GPR 3 "reg_or_0_operand" "rJ") ++;; (match_operand:GPR 4 "reg_or_0_operand" "rJ") ++;; (match_operand:SI 5 "const_int_operand")] ;; model ++;; UNSPEC_COMPARE_AND_SWAP)) ++;; (clobber (match_scratch:GPR 6 "=&r"))] ++;; "" ++;; "%G5\n\t1:\n\tll.\t%0,%1\n\tand\t%6,%0,%2\n\tbne\t%6,%z4,2f\n\tand\t%6,%0,%z3\n\tsc.\t%6,%1\n\tbeq\t$zero,%6,1b\n\t2:" ++;; [(set (attr "length") (const_int 20))]) ++;; ++ ++ ++(define_expand "atomic_exchange" ++ [(set (match_operand:SHORT 0 "register_operand") ++ (unspec_volatile:SHORT ++ [(match_operand:SHORT 1 "memory_operand") ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_EXCHANGE)) ++ (set (match_dup 1) ++ (match_operand:SHORT 2 "register_operand"))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ ++ ++(define_expand "atomic_fetch_add" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(plus:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_add_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ ++(define_expand "atomic_fetch_sub" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(minus:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_sub_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ ++(define_expand "atomic_fetch_and" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(and:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_and_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ ++(define_expand "atomic_fetch_xor" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(xor:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_xor_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ ++(define_expand "atomic_fetch_or" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(ior:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_or_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ ++(define_expand "atomic_fetch_nand" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(not:SHORT (and:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "" ++{ ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_nand_7_si; ++ loongarch_expand_atomic_qihi (generator, ++ operands[0], ++ operands[1], ++ operands[1], ++ operands[2], ++ operands[3]); ++ DONE; ++}) ++ +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/t-linux gcc-10.3.0/gcc/config/loongarch/t-linux +--- gcc-10.3.0.org/gcc/config/loongarch/t-linux 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/t-linux 2022-03-23 17:40:29.351280087 +0800 +@@ -0,0 +1,23 @@ ++# Copyright (C) 2003-2018 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++MULTILIB_OSDIRNAMES := ../lib64$(call if_multiarch,:loongarch64-linux-gnu) ++MULTIARCH_DIRNAME := $(call if_multiarch,loongarch64-linux-gnu) ++ ++# haven't supported lp32 yet ++MULTILIB_EXCEPTIONS = mabi=lp32 +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/t-loongarch gcc-10.3.0/gcc/config/loongarch/t-loongarch +--- gcc-10.3.0.org/gcc/config/loongarch/t-loongarch 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/t-loongarch 2022-03-23 17:40:29.351280087 +0800 +@@ -0,0 +1,46 @@ ++# Copyright (C) 2002-2018 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++$(srcdir)/config/loongarch/loongarch-tables.opt: $(srcdir)/config/loongarch/genopt.sh \ ++ $(srcdir)/config/loongarch/loongarch-cpus.def ++ $(SHELL) $(srcdir)/config/loongarch/genopt.sh $(srcdir)/config/loongarch > \ ++ $(srcdir)/config/loongarch/loongarch-tables.opt ++ ++frame-header-opt.o: $(srcdir)/config/loongarch/frame-header-opt.c ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ ++loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.c ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ ++loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.c $(CONFIG_H) $(SYSTEM_H) \ ++ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/loongarch/loongarch-c.c ++ ++loongarch-builtins.o: $(srcdir)/config/loongarch/loongarch-builtins.c $(CONFIG_H) \ ++ $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \ ++ $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \ ++ $(srcdir)/config/loongarch/loongarch-modes.def ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/loongarch/loongarch-builtins.c ++ ++comma=, ++MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) ++MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) +diff -uNr gcc-10.3.0.org/gcc/config/loongarch/x-native gcc-10.3.0/gcc/config/loongarch/x-native +--- gcc-10.3.0.org/gcc/config/loongarch/x-native 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config/loongarch/x-native 2022-03-23 17:40:29.351280087 +0800 +@@ -0,0 +1,3 @@ ++driver-native.o : $(srcdir)/config/loongarch/driver-native.c \ ++ $(CONFIG_H) $(SYSTEM_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< +diff -uNr gcc-10.3.0.org/gcc/config.gcc gcc-10.3.0/gcc/config.gcc +--- gcc-10.3.0.org/gcc/config.gcc 2021-04-08 19:56:28.033740243 +0800 ++++ gcc-10.3.0/gcc/config.gcc 2022-03-23 17:40:29.352280079 +0800 +@@ -477,6 +477,14 @@ + extra_objs="frame-header-opt.o" + extra_options="${extra_options} g.opt fused-madd.opt mips/mips-tables.opt" + ;; ++loongarch*-*-*) ++ cpu_type=loongarch ++ d_target_objs="loongarch-d.o" ++ extra_headers="lvzintrin.h larchintrin.h" ++ extra_objs="frame-header-opt.o loongarch-c.o loongarch-builtins.o" ++ extra_options="${extra_options} g.opt fused-madd.opt loongarch/loongarch-tables.opt" ++ ++ ;; + nds32*) + cpu_type=nds32 + extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc" +@@ -2575,6 +2583,55 @@ + tmake_file="${tmake_file} mips/t-linux64" + fi + ;; ++loongarch*-*-linux*) ++ case ${with_abi} in ++ "") ++ echo "not specify ABI, default is lp64 for loongarch64" ++ with_abi=lp64 # for default ++ ;; ++ lpx32) ++ ;; ++ lp32) ++ ;; ++ lp64) ++ ;; ++ *) ++ echo "Unknown ABI used in --with-abi=$with_abi" ++ exit 1 ++ esac ++ ++ enable_multilib="yes" ++ loongarch_multilibs="${with_multilib_list}" ++ if test "$loongarch_multilibs" = "default"; then ++ loongarch_multilibs="${with_abi}" ++ fi ++ loongarch_multilibs=`echo $loongarch_multilibs | sed -e 's/,/ /g'` ++ for loongarch_multilib in ${loongarch_multilibs}; do ++ case ${loongarch_multilib} in ++ lp64 | lpx32 | lp32 ) ++ TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG},${loongarch_multilib}" ++ ;; ++ *) ++ echo "--with-multilib-list=${loongarch_multilib} not supported." ++ exit 1 ++ esac ++ done ++ TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` ++ ++ if test `for one_abi in ${loongarch_multilibs}; do if [ x\$one_abi = x$with_abi ]; then echo 1; exit 0; fi; done; echo 0;` = "0"; then ++ echo "--with-abi=${with_abi} must be one of --with-multilib-list=${with_multilib_list}" ++ exit 1 ++ fi ++ ++ tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/linux-common.h" ++ extra_options="${extra_options} linux-android.opt" ++ tmake_file="${tmake_file} loongarch/t-linux" ++ gnu_ld=yes ++ gas=yes ++ # Force .init_array support. The configure script cannot always ++ # automatically detect that GAS supports it, yet we require it. ++ gcc_cv_initfini_array=yes ++ ;; + mips*-mti-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/n32-elf.h mips/sde.h mips/mti-elf.h" + tmake_file="mips/t-mti-elf" +@@ -2629,6 +2686,31 @@ + ;; + esac + ;; ++loongarch*-sde-elf*) ++ tm_file="elfos.h newlib-stdint.h ${tm_file} loongarch/elf.h loongarch/sde.h" ++# tmake_file="loongarch/t-sde" ++ extra_options="${extra_options} loongarch/sde.opt" ++ case "${with_newlib}" in ++ yes) ++ # newlib / libgloss. ++ ;; ++ *) ++ # MIPS toolkit libraries. ++ tm_file="$tm_file loongarch/sdemtk.h" ++ tmake_file="$tmake_file loongarch/t-sdemtk" ++ case ${enable_threads} in ++ "" | yes | loongarchsde) ++ thread_file='loongarchsde' ++ ;; ++ esac ++ ;; ++ esac ++ case ${target} in ++ loongarch*) ++ tm_defines="LARCH_ISA_DEFAULT=65 LARCH_ABI_DEFAULT=ABILP64" ++ ;; ++ esac ++ ;; + mipsisa32-*-elf* | mipsisa32el-*-elf* | \ + mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ + mipsisa32r6-*-elf* | mipsisa32r6el-*-elf* | \ +@@ -4092,6 +4174,9 @@ + # The kernel emulates LL and SC where necessary. + with_llsc=yes + ;; ++ loongarch*-*-linux*) ++ with_fix-loongson3-llsc=yes ++ ;; + mips64r5900-*-* | mips64r5900el-*-* | mipsr5900-*-* | mipsr5900el-*-*) + # The R5900 doesn't support LL(D) and SC(D). + with_llsc=no +@@ -4854,6 +4939,55 @@ + esac + ;; + ++ loongarch*-*-*) ++ supported_defaults="abi arch arch_32 arch_64 float fpu fp_32 tune tune_32 tune_64 fix-loongson3-llsc" ++ ++ case ${with_float} in ++ "" | soft | hard) ++ # OK ++ ;; ++ *) ++ echo "Unknown floating point type used in --with-float=$with_float" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ case ${with_fpu} in ++ "" | single | double) ++ # OK ++ ;; ++ *) ++ echo "Unknown fpu type used in --with-fpu=$with_fpu" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ case ${with_fp_32} in ++ "" | 32 | xx | 64) ++ # OK ++ ;; ++ *) ++ echo "Unknown FP mode used in --with-fp-32=$with_fp_32" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ case ${with_fix_loongson3_llsc} in ++ yes) ++ with_fix_loongson3_llsc=fix-loongson3-llsc ++ ;; ++ no) ++ with_fix_loongson3_llsc=no-fix-loongson3-llsc ++ ;; ++ "") ++ ;; ++ *) ++ echo "Unknown fix-loongson3-llsc type used in --with-fix-loongson3-llsc" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ;; ++ + nds32*-*-*) + supported_defaults="arch cpu nds32_lib float fpu_config" + +@@ -5301,6 +5435,29 @@ + tmake_file="mips/t-mips $tmake_file" + ;; + ++ loongarch*-*-*) ++ case ${target} in ++ loongarch*-*-*) ++ tm_defines="TARGET_ENDIAN_DEFAULT=0 $tm_defines" ++ ;; ++ esac ++ if test x$with_arch != x; then ++ default_loongarch_arch=$with_arch ++ fi ++ if test x$with_abi != x; then ++ default_loongarch_abi=$with_abi ++ fi ++ case ${default_loongarch_arch} in ++ loongarch | loongarch64) tm_defines="$tm_defines LARCH_ISA_DEFAULT=65" ;; ++ esac ++ case ${default_loongarch_abi} in ++ lp64) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP64" ;; ++ lp32) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP32" ;; ++ lpx32) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILPX32" ;; ++ esac ++ tmake_file="loongarch/t-loongarch $tmake_file" ++ ;; ++ + powerpc*-*-* | rs6000-*-*) + # FIXME: The PowerPC port uses the value set at compile time, + # although it's only cosmetic. +diff -uNr gcc-10.3.0.org/gcc/config.host gcc-10.3.0/gcc/config.host +--- gcc-10.3.0.org/gcc/config.host 2022-03-23 17:25:14.305350343 +0800 ++++ gcc-10.3.0/gcc/config.host 2022-03-23 17:40:29.352280079 +0800 +@@ -146,6 +146,14 @@ + ;; + esac + ;; ++ loongarch*-*-linux*) ++ case ${target} in ++ loongarch*-*-linux*) ++ host_extra_gcc_objs="driver-native.o" ++ host_xmake_file="${host_xmake_file} loongarch/x-native" ++ ;; ++ esac ++ ;; + rs6000-*-* \ + | powerpc*-*-* ) + case ${target} in +diff -uNr gcc-10.3.0.org/gcc/config.host.orig gcc-10.3.0/gcc/config.host.orig +--- gcc-10.3.0.org/gcc/config.host.orig 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/config.host.orig 2022-03-23 17:40:29.353280071 +0800 +@@ -0,0 +1,301 @@ ++# GCC host-specific configuration file. ++# Copyright (C) 1997-2020 Free Software Foundation, Inc. ++ ++#This file is part of GCC. ++ ++#GCC is free software; you can redistribute it and/or modify it under ++#the terms of the GNU General Public License as published by the Free ++#Software Foundation; either version 3, or (at your option) any later ++#version. ++ ++#GCC is distributed in the hope that it will be useful, but WITHOUT ++#ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++#FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++#for more details. ++ ++#You should have received a copy of the GNU General Public License ++#along with GCC; see the file COPYING3. If not see ++#. ++ ++# This is the GCC host-specific configuration file ++# where a configuration type is mapped to different system-specific ++# definitions and files. This is invoked by the autoconf-generated ++# configure script. Putting it in a separate shell file lets us skip ++# running autoconf when modifying host-specific information. ++ ++# This file switches on the shell variable ${host}. As much of this as ++# is reasonable should be replaced with autoconf tests in the future. ++ ++# This file sets the following shell variables for use by the ++# autoconf-generated configure script: ++# ++# host_xm_file List of files to include when compiling for the ++# host machine. ++# ++# host_xm_defines List of macros to define when compiling for the ++# host machine. ++# ++# host_xmake_file List of host-specific makefile-fragments. ++# ++# host_exeext Set to the suffix, if the host machine requires ++# executables to have a file name suffix. ++# ++# host_extra_objs List of extra host-dependent objects that should ++# be linked into the compiler proper. ++# ++# host_extra_gcc_objs List of extra host-dependent objects that should ++# be linked into the gcc driver. ++# ++# out_host_hook_obj An object file that provides the host hooks. ++# ++# host_can_use_collect2 Set to yes normally; to no if the host cannot ++# link or otherwise use collect2 ++# ++# use_long_long_for_widest_fast_int Set this to 'yes' if 'long long' ++# (or '__int64') is wider than 'long' but still ++# efficeiently supported by the host hardware. ++# Only affects compile speed. Default is 'no'. ++# ++# host_lto_plugin_soname Set this to the name to which the LTO linker ++# plugin gets compiled on this host, if it is ++# different from the default "liblto_plugin.so". ++# ++# host_bolt_plugin_soname Set this to the name to which the BOLT ++# plugin gets compiled on this host, if it is ++# different from the default "libbolt_plugin.so". ++# ++# When setting any of these variables, check to see if a corresponding ++# variable is present in config.build; if so, you will likely want to ++# set it in both places. ++ ++# Default settings. ++host_xm_file= ++host_xm_defines= ++host_xmake_file= ++host_exeext= ++host_extra_objs= ++host_extra_gcc_objs= ++out_host_hook_obj=host-default.o ++host_can_use_collect2=yes ++use_long_long_for_widest_fast_int=no ++host_lto_plugin_soname=liblto_plugin.so ++host_bolt_plugin_soname=libbolt_plugin.so ++ ++# Unsupported hosts list. Generally, only include hosts known to fail here, ++# since we allow hosts not listed to be supported generically. ++case ${host} in ++ i[34567]86-sequent-sysv \ ++ | i[34567]86-sequent-sysv[123]* \ ++ | i[34567]86-go32-* \ ++ | i[34567]86-*-go32* \ ++ | vax-*-vms*) ++ echo "*** Configuration for host ${host} not supported" 1>&2 ++ exit 1 ++ ;; ++esac ++ ++# Common parts for widely ported systems. ++case ${host} in ++ *-darwin*) ++ # Generic darwin host support. ++ out_host_hook_obj=host-darwin.o ++ host_xmake_file="${host_xmake_file} x-darwin" ++ ;; ++esac ++ ++case ${host} in ++ aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia*) ++ case ${target} in ++ aarch64*-*-*) ++ host_extra_gcc_objs="driver-aarch64.o" ++ host_xmake_file="${host_xmake_file} aarch64/x-aarch64" ++ ;; ++ esac ++ ;; ++ arm*-*-freebsd* | arm*-*-netbsd* | arm*-*-linux* | arm*-*-fuchsia*) ++ case ${target} in ++ arm*-*-*) ++ host_extra_gcc_objs="driver-arm.o" ++ host_xmake_file="${host_xmake_file} arm/x-arm" ++ ;; ++ esac ++ ;; ++ alpha*-*-linux*) ++ case ${target} in ++ alpha*-*-linux*) ++ host_extra_gcc_objs="driver-alpha.o" ++ host_xmake_file="${host_xmake_file} alpha/x-alpha" ++ ;; ++ esac ++ ;; ++ i[34567]86-*-* \ ++ | x86_64-*-* ) ++ case ${target} in ++ i[34567]86-*-* \ ++ | x86_64-*-* ) ++ host_extra_gcc_objs="driver-i386.o" ++ host_xmake_file="${host_xmake_file} i386/x-i386" ++ ;; ++ esac ++ ;; ++ mips*-*-linux*) ++ case ${target} in ++ mips*-*-linux*) ++ host_extra_gcc_objs="driver-native.o" ++ host_xmake_file="${host_xmake_file} mips/x-native" ++ ;; ++ esac ++ ;; ++ rs6000-*-* \ ++ | powerpc*-*-* ) ++ case ${target} in ++ rs6000-*-* \ ++ | powerpc*-*-* ) ++ host_extra_gcc_objs="driver-rs6000.o" ++ host_xmake_file="${host_xmake_file} rs6000/x-rs6000" ++ ;; ++ esac ++ case ${host} in ++ *-*-linux* | *-*-freebsd*) ++ if test "${GCC}:${ac_cv_sizeof_long}" = yes:4; then ++ # On powerpc*-*-linux* use -Wl,--relax to link cc1, ++ # if ld is new enough, otherwise force -O1 in CFLAGS. ++ host_ppc_relax_xmake_file= ++ host_ld_ver=`${CC} -Wl,--version 2>/dev/null | sed 1q` ++ if echo "$host_ld_ver" | grep GNU > /dev/null; then ++ host_ld_date=`echo $host_ld_ver \ ++ | sed -n 's,^.*\([2-9][0-9][0-9][0-9]\)[-]*\([01][0-9]\)[-]*\([0-3][0-9]\).*$,\1\2\3,p'` ++ if test 0"$host_ld_date" -gt 20080806; then ++ host_ppc_relax_xmake_file=rs6000/x-linux-relax ++ fi ++ fi ++ if test -z "${host_ppc_relax_xmake_file}"; then ++ host_ppc_relax_xmake_file=x-cflags-O1 ++ fi ++ host_xmake_file="${host_xmake_file} ${host_ppc_relax_xmake_file}" ++ fi ++ ;; ++ esac ++ ;; ++ s390-*-* | s390x-*-*) ++ case ${target} in ++ s390-*-* | s390x-*-*) ++ host_extra_gcc_objs="driver-native.o" ++ host_xmake_file="${host_xmake_file} s390/x-native" ++ ;; ++ esac ++ ;; ++ sparc*-*-solaris2*) ++ case ${target} in ++ sparc*-*-solaris2*) ++ host_extra_gcc_objs="driver-sparc.o" ++ host_xmake_file="${host_xmake_file} sparc/x-sparc" ++ ;; ++ esac ++ ;; ++ sparc*-*-linux*) ++ case ${target} in ++ sparc*-*-linux*) ++ host_extra_gcc_objs="driver-sparc.o" ++ host_xmake_file="${host_xmake_file} sparc/x-sparc" ++ ;; ++ esac ++ ;; ++esac ++ ++# Machine-specific settings. ++case ${host} in ++ hppa1.0-*-hpux10* | hppa1.1-*-hpux10* | hppa2*-*-hpux10*) ++ out_host_hook_obj=host-hpux.o ++ host_xmake_file="${host_xmake_file} x-hpux" ++ ;; ++ hppa1.0-*-hpux11* | hppa1.1-*-hpux11* | hppa2*-*-hpux11* | \ ++ hppa*64*-*-hpux11*) ++ out_host_hook_obj=host-hpux.o ++ host_xmake_file="${host_xmake_file} x-hpux" ++ ;; ++ hppa*-*-linux*) ++ out_host_hook_obj=host-hpux.o ++ host_xmake_file="${host_xmake_file} x-hpux" ++ ;; ++ i[34567]86-*-solaris2* | x86_64-*-solaris2*) ++ out_host_hook_obj=host-solaris.o ++ host_xmake_file="${host_xmake_file} x-solaris" ++ ;; ++ i[34567]86-pc-msdosdjgpp*) ++ host_xm_file=i386/xm-djgpp.h ++ host_exeext=.exe ++ # Shorten $target_noncanonical for 8.3 filename conventions. ++ case ${target} in ++ *pc-msdosdjgpp*) ++ target_noncanonical=djgpp ++ ;; ++ esac ++ ;; ++ i[34567]86-*-cygwin* | x86_64-*-cygwin*) ++ host_xm_file=i386/xm-cygwin.h ++ out_host_hook_obj=host-cygwin.o ++ host_xmake_file="${host_xmake_file} i386/x-cygwin" ++ host_exeext=.exe ++ host_lto_plugin_soname=cyglto_plugin-0.dll ++ ;; ++ i[34567]86-*-mingw32*) ++ host_xm_file=i386/xm-mingw32.h ++ host_xmake_file="${host_xmake_file} i386/x-mingw32" ++ host_exeext=.exe ++ out_host_hook_obj=host-mingw32.o ++ host_extra_gcc_objs="${host_extra_gcc_objs} driver-mingw32.o" ++ host_lto_plugin_soname=liblto_plugin-0.dll ++ ;; ++ x86_64-*-mingw*) ++ use_long_long_for_widest_fast_int=yes ++ host_xm_file=i386/xm-mingw32.h ++ host_xmake_file="${host_xmake_file} i386/x-mingw32" ++ host_exeext=.exe ++ out_host_hook_obj=host-mingw32.o ++ host_extra_gcc_objs="${host_extra_gcc_objs} driver-mingw32.o" ++ host_lto_plugin_soname=liblto_plugin-0.dll ++ ;; ++ i[34567]86-*-darwin* | x86_64-*-darwin*) ++ out_host_hook_obj="${out_host_hook_obj} host-i386-darwin.o" ++ host_xmake_file="${host_xmake_file} i386/x-darwin" ++ ;; ++ powerpc-*-darwin*) ++ out_host_hook_obj="${out_host_hook_obj} host-ppc-darwin.o" ++ host_xmake_file="${host_xmake_file} rs6000/x-darwin" ++ ;; ++ powerpc64-*-darwin*) ++ out_host_hook_obj="${out_host_hook_obj} host-ppc64-darwin.o" ++ host_xmake_file="${host_xmake_file} rs6000/x-darwin64" ++ ;; ++ rs6000-ibm-aix* | powerpc-ibm-aix*) ++ host_xmake_file="${host_xmake_file} rs6000/x-aix" ++ ;; ++ *-*-solaris2*) ++ out_host_hook_obj=host-solaris.o ++ host_xmake_file="${host_xmake_file} x-solaris" ++ ;; ++ *-*-linux*) ++ out_host_hook_obj=host-linux.o ++ host_xmake_file="${host_xmake_file} x-linux" ++ ;; ++ *-*-openbsd*) ++ out_host_hook_obj=host-openbsd.o ++ host_xmake_file="${host_xmake_file} x-openbsd" ++ ;; ++ *-*-netbsd*) ++ out_host_hook_obj=host-netbsd.o ++ host_xmake_file="${host_xmake_file} x-netbsd" ++ ;; ++ ia64-*-hpux*) ++ use_long_long_for_widest_fast_int=yes ++ out_host_hook_obj=host-hpux.o ++ host_xmake_file="${host_xmake_file} x-hpux" ++ ;; ++ *-*-*vms*) ++ host_xm_file="vms/xm-vms.h" ++ host_xmake_file=vms/x-vms ++ host_exeext=.exe ++ host_can_use_collect2=no ++ ;; ++esac +diff -uNr gcc-10.3.0.org/gcc/configure gcc-10.3.0/gcc/configure +--- gcc-10.3.0.org/gcc/configure 2022-03-23 17:25:14.308350310 +0800 ++++ gcc-10.3.0/gcc/configure 2022-03-23 17:40:29.356280047 +0800 +@@ -7658,6 +7658,9 @@ + mips*-*-*) + enable_fixed_point=yes + ;; ++ loongarch*-*-*) ++ enable_fixed_point=yes ++ ;; + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: fixed-point is not supported for this target, ignored" >&5 + $as_echo "$as_me: WARNING: fixed-point is not supported for this target, ignored" >&2;} +@@ -24878,6 +24881,23 @@ + tls_first_minor=16 + tls_as_opt='-32 --fatal-warnings' + ;; ++ loongarch*-*-*) ++ conftest_s=' ++ .section .tdata,"awT",@progbits ++x: ++ .word 2 ++ .text ++ addiu $4, $28, %tlsgd(x) ++ addiu $4, $28, %tlsldm(x) ++ lui $4, %dtprel_hi(x) ++ addiu $4, $4, %dtprel_lo(x) ++ lw $4, %gottprel(x)($28) ++ lui $4, %tprel_hi(x) ++ addiu $4, $4, %tprel_lo(x)' ++ tls_first_major=2 ++ tls_first_minor=16 ++ tls_as_opt='-32 --fatal-warnings' ++ ;; + m68k-*-*) + conftest_s=' + .section .tdata,"awT",@progbits +@@ -28106,6 +28126,12 @@ + fi + + ;; ++ loongarch*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .dtprelword support" >&5 ++$as_echo_n "checking assembler for .dtprelword support... " >&6; } ++ ++$as_echo "#define HAVE_AS_DTPRELWORD 1" >>confdefs.h ++ ;; + s390*-*-*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .gnu_attribute support" >&5 + $as_echo_n "checking assembler for .gnu_attribute support... " >&6; } +@@ -28289,7 +28315,7 @@ + # Linux on IA64 might be able to use the Intel assembler. + + case "$target" in +- mips*-*-* | *-*-hpux* ) ++ mips*-*-* | loongarch*-*-* | *-*-hpux* ) + if test x$gas_flag = xyes \ + || test x"$host" != x"$build" \ + || test ! -x "$gcc_cv_as" \ +@@ -28310,7 +28336,7 @@ + # version to the per-target configury. + case "$cpu_type" in + aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \ +- | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ ++ | microblaze | mips | loongarch | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ + | tilegx | tilepro | visium | xstormy16 | xtensa) + insn="nop" + ;; +diff -uNr gcc-10.3.0.org/gcc/configure.ac gcc-10.3.0/gcc/configure.ac +--- gcc-10.3.0.org/gcc/configure.ac 2022-03-23 17:25:14.308350310 +0800 ++++ gcc-10.3.0/gcc/configure.ac 2022-03-23 17:40:29.358280031 +0800 +@@ -868,6 +868,9 @@ + mips*-*-*) + enable_fixed_point=yes + ;; ++ loongarch*-*-*) ++ enable_fixed_point=yes ++ ;; + *) + AC_MSG_WARN([fixed-point is not supported for this target, ignored]) + enable_fixed_point=no +@@ -3639,6 +3642,23 @@ + tls_first_minor=16 + tls_as_opt='-32 --fatal-warnings' + ;; ++ loongarch*-*-*) ++ conftest_s=' ++ .section .tdata,"awT",@progbits ++x: ++ .word 2 ++ .text ++ addiu $4, $28, %tlsgd(x) ++ addiu $4, $28, %tlsldm(x) ++ lui $4, %dtprel_hi(x) ++ addiu $4, $4, %dtprel_lo(x) ++ lw $4, %gottprel(x)($28) ++ lui $4, %tprel_hi(x) ++ addiu $4, $4, %tprel_lo(x)' ++ tls_first_major=2 ++ tls_first_minor=16 ++ tls_as_opt='-32 --fatal-warnings' ++ ;; + m68k-*-*) + conftest_s=' + .section .tdata,"awT",@progbits +@@ -5088,6 +5108,160 @@ + [AC_DEFINE(HAVE_AS_RISCV_ATTRIBUTE, 1, + [Define if your assembler supports .attribute.])]) + ;; ++ loongarch*-*-*) ++ gcc_GAS_CHECK_FEATURE([explicit relocation support], ++ gcc_cv_as_loongarch_explicit_relocs, [2,14,0],, ++[ lw $4,%gp_rel(foo)($4)],, ++ [if test x$target_cpu_default = x ++ then target_cpu_default=MASK_EXPLICIT_RELOCS ++ else target_cpu_default="($target_cpu_default)|MASK_EXPLICIT_RELOCS" ++ fi]) ++ ++ gcc_GAS_CHECK_FEATURE([-mno-shared support], ++ gcc_cv_as_loongarch_no_shared, [2,16,0], [-mno-shared], [nop],, ++ [AC_DEFINE(HAVE_AS_NO_SHARED, 1, ++ [Define if the assembler understands -mno-shared.])]) ++ ++ gcc_GAS_CHECK_FEATURE([.gnu_attribute support], ++ gcc_cv_as_loongarch_gnu_attribute, [2,18,0],, ++ [.gnu_attribute 4,1],, ++ [AC_DEFINE(HAVE_AS_GNU_ATTRIBUTE, 1, ++ [Define if your assembler supports .gnu_attribute.])]) ++ ++ gcc_GAS_CHECK_FEATURE([.module support], ++ gcc_cv_as_loongarch_dot_module,,[-32], ++ [.module mips2 ++ .module fp=xx],, ++ [AC_DEFINE(HAVE_AS_DOT_MODULE, 1, ++ [Define if your assembler supports .module.])]) ++ if test x$gcc_cv_as_loongarch_dot_module = xno \ ++ && test x$with_fp_32 != x; then ++ AC_MSG_ERROR( ++ [Requesting --with-fp-32= requires assembler support for .module.]) ++ fi ++ ++ gcc_GAS_CHECK_FEATURE([.micromips support], ++ gcc_cv_as_micromips_support,,[--fatal-warnings], ++ [.set micromips],, ++ [AC_DEFINE(HAVE_GAS_MICROMIPS, 1, ++ [Define if your assembler supports the .set micromips directive])]) ++ ++ gcc_GAS_CHECK_FEATURE([.dtprelword support], ++ gcc_cv_as_loongarch_dtprelword, [2,18,0],, ++ [.section .tdata,"awT",@progbits ++x: ++ .word 2 ++ .text ++ .dtprelword x+0x8000],, ++ [AC_DEFINE(HAVE_AS_DTPRELWORD, 1, ++ [Define if your assembler supports .dtprelword.])]) ++ ++ gcc_GAS_CHECK_FEATURE([DSPR1 mult with four accumulators support], ++ gcc_cv_as_loongarch_dspr1_mult,,, ++[ .set mips32r2 ++ .set nodspr2 ++ .set dsp ++ madd $ac3,$4,$5 ++ maddu $ac3,$4,$5 ++ msub $ac3,$4,$5 ++ msubu $ac3,$4,$5 ++ mult $ac3,$4,$5 ++ multu $ac3,$4,$5],, ++ [AC_DEFINE(HAVE_AS_DSPR1_MULT, 1, ++ [Define if your assembler supports DSPR1 mult.])]) ++ ++ AC_MSG_CHECKING(assembler and linker for explicit JALR relocation) ++ gcc_cv_as_ld_jalr_reloc=no ++ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then ++ if test $in_tree_ld = yes ; then ++ if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 20 -o "$gcc_cv_gld_major_version" -gt 2 \ ++ && test $in_tree_ld_is_elf = yes; then ++ gcc_cv_as_ld_jalr_reloc=yes ++ fi ++ elif test x$gcc_cv_as != x -a x$gcc_cv_ld != x -a x$gcc_cv_objdump != x; then ++ echo ' .ent x' > conftest.s ++ echo 'x: lw $2,%got_disp(y)($3)' >> conftest.s ++ echo ' lw $25,%call16(y)($28)' >> conftest.s ++ echo ' .reloc 1f,R_LOONGISA_JALR,y' >> conftest.s ++ echo '1: jalr $25' >> conftest.s ++ echo ' .reloc 1f,R_LOONGISA_JALR,x' >> conftest.s ++ echo '1: jalr $25' >> conftest.s ++ echo ' .end x' >> conftest.s ++ if $gcc_cv_as -o conftest.o conftest.s >/dev/null 2>&AS_MESSAGE_LOG_FD \ ++ && $gcc_cv_ld -shared -o conftest.so conftest.o >/dev/null 2>&AS_MESSAGE_LOG_FD; then ++ if $gcc_cv_objdump -d conftest.so | grep jalr >/dev/null 2>&1 \ ++ && $gcc_cv_objdump -d conftest.so | grep "bal.*" >/dev/null 2>&1; then ++ gcc_cv_as_ld_jalr_reloc=yes ++ fi ++ fi ++ rm -f conftest.* ++ fi ++ fi ++ if test $gcc_cv_as_ld_jalr_reloc = yes; then ++ if test x$target_cpu_default = x; then ++ target_cpu_default=MASK_RELAX_PIC_CALLS ++ else ++ target_cpu_default="($target_cpu_default)|MASK_RELAX_PIC_CALLS" ++ fi ++ fi ++ AC_MSG_RESULT($gcc_cv_as_ld_jalr_reloc) ++ ++ AC_CACHE_CHECK([linker for .eh_frame personality relaxation], ++ [gcc_cv_ld_loongarch_personality_relaxation], ++ [gcc_cv_ld_loongarch_personality_relaxation=no ++ if test $in_tree_ld = yes ; then ++ if test "$gcc_cv_gld_major_version" -eq 2 \ ++ -a "$gcc_cv_gld_minor_version" -ge 21 \ ++ -o "$gcc_cv_gld_major_version" -gt 2; then ++ gcc_cv_ld_loongarch_personality_relaxation=yes ++ fi ++ elif test x$gcc_cv_as != x \ ++ -a x$gcc_cv_ld != x \ ++ -a x$gcc_cv_readelf != x ; then ++ cat > conftest.s < /dev/null 2>&1 \ ++ && $gcc_cv_ld -o conftest conftest.o -shared > /dev/null 2>&1; then ++ if $gcc_cv_readelf -d conftest 2>&1 \ ++ | grep TEXTREL > /dev/null 2>&1; then ++ : ++ elif $gcc_cv_readelf --relocs conftest 2>&1 \ ++ | grep 'R_LOONGISA_REL32 *$' > /dev/null 2>&1; then ++ : ++ else ++ gcc_cv_ld_loongarch_personality_relaxation=yes ++ fi ++ fi ++ fi ++ rm -f conftest.s conftest.o conftest]) ++ if test x$gcc_cv_ld_loongarch_personality_relaxation = xyes; then ++ AC_DEFINE(HAVE_LD_PERSONALITY_RELAXATION, 1, ++ [Define if your linker can relax absolute .eh_frame personality ++pointers into PC-relative form.]) ++ fi ++ ++ gcc_GAS_CHECK_FEATURE([-mnan= support], ++ gcc_cv_as_loongarch_nan,, ++ [-mnan=2008],,, ++ [AC_DEFINE(HAVE_AS_NAN, 1, ++ [Define if the assembler understands -mnan=.])]) ++ if test x$gcc_cv_as_loongarch_nan = xno \ ++ && test x$with_nan != x; then ++ AC_MSG_ERROR( ++ [Requesting --with-nan= requires assembler support for -mnan=]) ++ fi ++ ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE([.gnu_attribute support], + gcc_cv_as_s390_gnu_attribute, [2,18,0],, +@@ -5125,7 +5299,7 @@ + # Linux on IA64 might be able to use the Intel assembler. + + case "$target" in +- mips*-*-* | *-*-hpux* ) ++ mips*-*-* | loongarch*-*-* | *-*-hpux* ) + if test x$gas_flag = xyes \ + || test x"$host" != x"$build" \ + || test ! -x "$gcc_cv_as" \ +diff -uNr gcc-10.3.0.org/gcc/doc/install.texi gcc-10.3.0/gcc/doc/install.texi +--- gcc-10.3.0.org/gcc/doc/install.texi 2021-04-08 19:56:28.301743479 +0800 ++++ gcc-10.3.0/gcc/doc/install.texi 2022-03-23 17:40:29.358280031 +0800 +@@ -1113,8 +1113,9 @@ + @itemx --without-multilib-list + Specify what multilibs to build. @var{list} is a comma separated list of + values, possibly consisting of a single value. Currently only implemented +-for aarch64*-*-*, arm*-*-*, riscv*-*-*, sh*-*-* and x86-64-*-linux*. The +-accepted values and meaning for each target is given below. ++for aarch64*-*-*, arm*-*-*, loongarch64-*-*, riscv*-*-*, sh*-*-* and ++x86-64-*-linux*. The accepted values and meaning for each target is given ++below. + + @table @code + @item aarch64*-*-* +@@ -1201,6 +1202,14 @@ + @code{-mfloat-abi=hard} + @end multitable + ++@item loongarch*-*-* ++@var{list} is a comma-separated list of the following ABI identifiers: ++@code{lp64d[/base]} @code{lp64f[/base]} @code{lp64d[/base]}, where the ++@code{/base} suffix may be omitted, to enable their respective run-time ++libraries. If @var{list} is empty, @code{default} ++or @option{--with-multilib-list} is not specified, then the default ABI ++as specified by @option{--with-abi} or implied by @option{--target} is selected. ++ + @item riscv*-*-* + @var{list} is a single ABI name. The target architecture must be either + @code{rv32gc} or @code{rv64gc}. This will build a single multilib for the +@@ -4302,6 +4311,34 @@ + + @html +


++@end html ++@anchor{loongarch} ++@heading LoongArch ++LoongArch processor. ++The following LoongArch targets are available: ++@table @code ++@item loongarch64-linux-gnu* ++LoongArch processor running GNU/Linux. This target triplet may be coupled ++with a small set of possible suffixes to identify their default ABI type: ++@table @code ++@item f64 ++Uses @code{lp64d/base} ABI by default. ++@item f32 ++Uses @code{lp64f/base} ABI by default. ++@item sf ++Uses @code{lp64s/base} ABI by default. ++@end table ++ ++@item loongarch64-linux-gnu ++Same as @code{loongarch64-linux-gnuf64}, but may be used with ++@option{--with-abi=*} to configure the default ABI type. ++@end table ++ ++More information about LoongArch can be found at ++@uref{https://github.com/loongson/LoongArch-Documentation}. ++ ++@html ++
+ @end html + @anchor{m32c-x-elf} + @heading m32c-*-elf +diff -uNr gcc-10.3.0.org/gcc/doc/invoke.texi gcc-10.3.0/gcc/doc/invoke.texi +--- gcc-10.3.0.org/gcc/doc/invoke.texi 2022-03-23 17:25:14.248350986 +0800 ++++ gcc-10.3.0/gcc/doc/invoke.texi 2022-03-23 17:40:29.362279999 +0800 +@@ -941,6 +941,16 @@ + @gccoptlist{-mbarrel-shift-enabled -mdivide-enabled -mmultiply-enabled @gol + -msign-extend-enabled -muser-enabled} + ++@emph{LoongArch Options} ++@gccoptlist{-march=@var{cpu-type} -mtune=@var{cpu-type} -mabi=@var{base-abi-type} @gol ++-mfpu=@var{fpu-type} -msoft-float -msingle-float -mdouble-float @gol ++-mbranch-cost=@var{n} -mcheck-zero-division -mno-check-zero-division @gol ++-mcond-move-int -mno-cond-move-int @gol ++-mcond-move-float -mno-cond-move-float @gol ++-memcpy -mno-memcpy -mstrict-align @gol ++-mmax-inline-memcpy-size=@var{n} @gol ++-mlra -mcmodel=@var{code-model}} ++ + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol + -mdebug @gol +@@ -16751,6 +16761,7 @@ + * HPPA Options:: + * IA-64 Options:: + * LM32 Options:: ++* LoongArch Options:: + * M32C Options:: + * M32R/D Options:: + * M680x0 Options:: +@@ -22138,6 +22149,195 @@ + + @end table + ++@node LoongArch Options ++@subsection LoongArch Options ++@cindex LoongArch Options ++ ++These command-line options are defined for LoongArch targets: ++ ++@table @gcctabopt ++@item -march=@var{cpu-type} ++@opindex -march ++Generate instructions for the machine type @var{cpu-type}. In contrast to ++@option{-mtune=@var{cpu-type}}, which merely tunes the generated code ++for the specified @var{cpu-type}, @option{-march=@var{cpu-type}} allows GCC ++to generate code that may not run at all on processors other than the one ++indicated. Specifying @option{-march=@var{cpu-type}} implies ++@option{-mtune=@var{cpu-type}}, except where noted otherwise. ++ ++The choices for @var{cpu-type} are: ++ ++@table @samp ++@item native ++This selects the CPU to generate code for at compilation time by determining ++the processor type of the compiling machine. Using @option{-march=native} ++enables all instruction subsets supported by the local machine (hence ++the result might not run on different machines). Using @option{-mtune=native} ++produces code optimized for the local machine under the constraints ++of the selected instruction set. ++@item loongarch64 ++A generic CPU with 64-bit extensions. ++@item la464 ++LoongArch LA464 CPU with LBT, LSX, LASX, LVZ. ++@end table ++ ++ ++@item -mtune=@var{cpu-type} ++@opindex mtune ++Optimize the output for the given processor, specified by microarchitecture ++name. ++ ++@item -mabi=@var{base-abi-type} ++@opindex mabi ++Generate code for the specified calling convention. @gol ++Set base ABI to one of: @gol ++@table @samp ++@item lp64d ++Uses 64-bit general purpose registers and 32/64-bit floating-point ++registers for parameter passing. Data model is LP64, where int ++is 32 bits, while long int and pointers are 64 bits. ++@item lp64f ++Uses 64-bit general purpose registers and 32-bit floating-point ++registers for parameter passing. Data model is LP64, where int ++is 32 bits, while long int and pointers are 64 bits. ++@item lp64s ++Uses 64-bit general purpose registers and no floating-point ++registers for parameter passing. Data model is LP64, where int ++is 32 bits, while long int and pointers are 64 bits. ++@end table ++ ++ ++@item -mfpu=@var{fpu-type} ++@opindex mfpu ++Generating code for the specified FPU type: @gol ++@table @samp ++@item 64 ++Allow the use of hardware floating-point instructions for 32-bit ++and 64-bit operations. ++@item 32 ++Allow the use of hardware floating-point instructions for 32-bit ++operations. ++@item none ++@item 0 ++Prevent the use of hardware floating-point instructions. ++@end table ++ ++ ++@item -msoft-float ++@opindex msoft-float ++Force @option{-mfpu=none} and prevents the use of floating-point ++registers for parameter passing. This option may change the target ++ABI. ++ ++@item -msingle-float ++@opindex -msingle-float ++Force @option{-mfpu=32} and allow the use of 32-bit floating-point ++registers for parameter passing. This option may change the target ++ABI. ++ ++@item -mdouble-float ++@opindex -mdouble-float ++Force @option{-mfpu=64} and allow the use of 32/64-bit floating-point ++registers for parameter passing. This option may change the target ++ABI. ++ ++ ++@item -mbranch-cost=@var{n} ++@opindex -mbranch-cost ++Set the cost of branches to roughly n instructions. ++ ++@item -mcheck-zero-division ++@itemx -mno-check-zero-divison ++@opindex -mcheck-zero-division ++Trap (do not trap) on integer division by zero. The default is '-mcheck-zero- ++division'. ++ ++ ++@item -mcond-move-int ++@itemx -mno-cond-move-int ++@opindex -mcond-move-int ++Conditional moves for floating point are enabled (disabled). The default is ++'-mcond-move-float'. ++ ++@item -mmemcpy ++@itemx -mno-memcpy ++@opindex -mmemcpy ++Force (do not force) the use of memcpy for non-trivial block moves. The default ++is '-mno-memcpy', which allows GCC to inline most constant-sized copies. ++ ++ ++@item -mlra ++@opindex -mlra ++Use the new LRA register allocator. By default, the LRA is used. ++ ++@item -mstrict-align ++@itemx -mno-strict-align ++@opindex -mstrict-align ++Avoid or allow generating memory accesses that may not be aligned on a natural ++object boundary as described in the architecture specification. ++ ++@item -msmall-data-limit=@var{number} ++@opindex -msmall-data-limit ++Put global and static data smaller than @code{number} bytes into a special section (on some targets). ++Default value is 0. ++ ++@item -mmax-inline-memcpy-size=@var{n} ++@opindex -mmax-inline-memcpy-size ++Set the max size n of memcpy to inline, default @code{n} is 1024. ++ ++@item -mcmodel=@var{code-model} ++Default code model is normal. ++Set the code model to one of: ++@table @samp ++@item tiny-static ++@itemize @bullet ++@item ++local symbol and global strong symbol: The data section must be within +/-2MiB addressing space. ++The text section must be within +/-128MiB addressing space. ++@item ++global weak symbol: The got table must be within +/-2GiB addressing space. ++@end itemize ++ ++@item tiny ++@itemize @bullet ++@item ++local symbol: The data section must be within +/-2MiB addressing space. ++The text section must be within +/-128MiB ++addressing space. ++@item ++global symbol: The got table must be within +/-2GiB addressing space. ++@end itemize ++ ++@item normal ++@itemize @bullet ++@item ++local symbol: The data section must be within +/-2GiB addressing space. ++The text section must be within +/-128MiB addressing space. ++@item ++global symbol: The got table must be within +/-2GiB addressing space. ++@end itemize ++ ++@item large ++@itemize @bullet ++@item ++local symbol: The data section must be within +/-2GiB addressing space. ++The text section must be within +/-128GiB addressing space. ++@item ++global symbol: The got table must be within +/-2GiB addressing space. ++@end itemize ++ ++@item extreme(Not implemented yet) ++@itemize @bullet ++@item ++local symbol: The data and text section must be within +/-8EiB addressing space. ++@item ++global symbol: The data got table must be within +/-8EiB addressing space. ++@end itemize ++@end table ++@end table ++ ++ ++ + @node M32C Options + @subsection M32C Options + @cindex M32C options +diff -uNr gcc-10.3.0.org/gcc/doc/md.texi gcc-10.3.0/gcc/doc/md.texi +--- gcc-10.3.0.org/gcc/doc/md.texi 2021-04-08 19:56:28.305743528 +0800 ++++ gcc-10.3.0/gcc/doc/md.texi 2022-03-23 17:40:29.363279991 +0800 +@@ -2739,6 +2739,61 @@ + $r1h + @end table + ++@item LoongArch---@file{config/loongarch/constraints.md} ++@table @code ++@item a ++A constant call global and noplt address. ++@item c ++A constant call local address. ++@item e ++A register that is used as function call. ++@item f ++A floating-point register (if available). ++@item h ++A constant call plt address. ++@item j ++A rester that is used as sibing call. ++@item l ++A signed 16-bit constant. ++@item m ++A memory operand whose address is formed by a base register and offset ++that is suitable for use in instructions with the same addressing mode ++as @code{st.w} and @code{ld.w}. ++@item q ++A general-purpose register except for $r0 and $r1 for csr instructions. ++@item t ++A constant call weak address. ++@item u ++A signed 52bit constant and low 32-bit is zero (for logic instructions). ++@item v ++A nsigned 64-bit constant and low 44-bit is zero (for logic instructions). ++@item z ++A floating-point condition code register. ++@item G ++Floating-point zero. ++@item I ++A signed 12-bit constant (for arithmetic instructions). ++@item J ++Integer zero. ++@item K ++An unsigned 12-bit constant (for logic instructions). ++@item Q ++A 12-bit constant used for logical operations. ++@item W ++A memory address based on a member of @code{BASE_REG_CLASS}. This is ++true for allreferences. ++@item Yd ++A constant @code{move_operand} that can be safely loaded using ++@code{la}. ++@item ZB ++An address that is held in a general-purpose register. ++The offset is zero. ++@item ZC ++A memory operand whose address is formed by a base register and offset ++that is suitable for use in instructions with the same addressing mode ++as @code{ll.w} and @code{sc.w}. ++@end table ++ + @item MicroBlaze---@file{config/microblaze/constraints.md} + @table @code + @item d +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.dg/20020312-2.c gcc-10.3.0/gcc/testsuite/gcc.dg/20020312-2.c +--- gcc-10.3.0.org/gcc/testsuite/gcc.dg/20020312-2.c 2021-04-08 19:56:28.969751546 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.dg/20020312-2.c 2022-03-23 17:40:29.364279983 +0800 +@@ -37,6 +37,8 @@ + /* PIC register is r1, but is used even without -fpic. */ + #elif defined(__lm32__) + /* No pic register. */ ++#elif defined(__loongarch__) ++/* No pic register. */ + #elif defined(__M32R__) + /* No pic register. */ + #elif defined(__m68k__) +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.dg/loop-8.c gcc-10.3.0/gcc/testsuite/gcc.dg/loop-8.c +--- gcc-10.3.0.org/gcc/testsuite/gcc.dg/loop-8.c 2021-04-08 19:56:29.045752463 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.dg/loop-8.c 2022-03-23 17:40:29.364279983 +0800 +@@ -1,6 +1,6 @@ + /* { dg-do compile } */ + /* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ +-/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* riscv*-*-*" } } */ ++/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* riscv*-*-* loongarch*-*-*"} } */ + /* Load immediate on condition is available from z13 on and prevents moving + the load out of the loop, so always run this test with -march=zEC12 that + does not have load immediate on condition. */ +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.dg/stack-usage-1.c gcc-10.3.0/gcc/testsuite/gcc.dg/stack-usage-1.c +--- gcc-10.3.0.org/gcc/testsuite/gcc.dg/stack-usage-1.c 2021-04-08 19:56:29.085752947 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.dg/stack-usage-1.c 2022-03-23 17:40:29.364279983 +0800 +@@ -103,6 +103,9 @@ + #define SIZE 252 + #elif defined (__csky__) + # define SIZE 252 ++#elif defined (__loongarch__) ++# define SIZE 256 - 16 ++#else + #else + # define SIZE 256 + #endif +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c gcc-10.3.0/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c +--- gcc-10.3.0.org/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c 2021-04-08 19:56:29.109753236 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c 2022-03-23 17:40:29.364279983 +0800 +@@ -9,7 +9,7 @@ + /* arm_hf_eabi: Variadic funcs use Base AAPCS. Normal funcs use VFP variant. + avr: Variadic funcs don't pass arguments in registers, while normal funcs + do. */ +-/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* } } } */ ++/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* loongarch*-*-*} } } */ + /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { nds32*-*-* } { v850*-*-* } } */ + /* { dg-require-effective-target untyped_assembly } */ + +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c gcc-10.3.0/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c +--- gcc-10.3.0.org/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c 2021-04-08 19:56:29.133753527 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c 2022-03-23 17:40:29.364279983 +0800 +@@ -5,7 +5,7 @@ + + When the condition is true, we distribute "(int) (a + b)" as + "(int) a + (int) b", otherwise we keep the original. */ +-/* { dg-do compile { target { ! mips64 } } } */ ++/* { dg-do compile { target { ! mips64 } && { ! loongarch64 } } } */ + /* { dg-options "-O -fno-tree-forwprop -fno-tree-ccp -fwrapv -fdump-tree-fre1-details" } */ + + /* From PR14844. */ +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.target/loongarch/loongarch.exp gcc-10.3.0/gcc/testsuite/gcc.target/loongarch/loongarch.exp +--- gcc-10.3.0.org/gcc/testsuite/gcc.target/loongarch/loongarch.exp 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.target/loongarch/loongarch.exp 2022-03-23 17:40:29.364279983 +0800 +@@ -0,0 +1,40 @@ ++# Copyright (C) 2021 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't a LoongArch target. ++if ![istarget loongarch*-*-*] then { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# If a testcase doesn't have special options, use these. ++global DEFAULT_CFLAGS ++if ![info exists DEFAULT_CFLAGS] then { ++ set DEFAULT_CFLAGS " " ++} ++ ++# Initialize `dg'. ++dg-init ++ ++# Main loop. ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ ++ "" $DEFAULT_CFLAGS ++# All done. ++dg-finish +diff -uNr gcc-10.3.0.org/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c gcc-10.3.0/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c +--- gcc-10.3.0.org/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c 2022-03-23 17:40:29.364279983 +0800 +@@ -0,0 +1,16 @@ ++/* Test asm const. */ ++/* { dg-do compile } */ ++/* { dg-final { scan-assembler-times "foo:.*\\.long 1061109567.*\\.long 52" 1 } } */ ++int foo () ++{ ++ __asm__ volatile ( ++ "foo:" ++ "\n\t" ++ ".long %a0\n\t" ++ ".long %a1\n\t" ++ : ++ :"i"(0x3f3f3f3f), "i"(52) ++ : ++ ); ++} ++ +diff -uNr gcc-10.3.0.org/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C gcc-10.3.0/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C +--- gcc-10.3.0.org/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C 2021-04-08 19:56:28.721748552 +0800 ++++ gcc-10.3.0/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C 2022-03-23 17:40:29.364279983 +0800 +@@ -1,6 +1,6 @@ + // PR c++/49673: check that test_data goes into .rodata + // { dg-do compile { target c++11 } } +-// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } } ++// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* loongarch*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } } + // { dg-final { scan-assembler "\\.rdata" { target mips*-*-* } } } + // { dg-final { scan-assembler "rodata" { target { { *-*-linux-gnu *-*-gnu* *-*-elf } && { ! { mips*-*-* riscv*-*-* } } } } } } + +diff -uNr gcc-10.3.0.org/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C gcc-10.3.0/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C +--- gcc-10.3.0.org/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C 2021-04-08 19:56:28.877750435 +0800 ++++ gcc-10.3.0/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C 2022-03-23 17:40:29.365279975 +0800 +@@ -7,7 +7,7 @@ + function. However, some platforms use all bits to encode a + function pointer. Such platforms use the lowest bit of the delta, + that is shifted left by one bit. */ +-#if defined __MN10300__ || defined __SH5__ || defined __arm__ || defined __thumb__ || defined __mips__ || defined __aarch64__ || defined __PRU__ ++#if defined __MN10300__ || defined __SH5__ || defined __arm__ || defined __thumb__ || defined __mips__ || defined __aarch64__ || defined __PRU__ || defined __loongarch__ + #define ADJUST_PTRFN(func, virt) ((void (*)())(func)) + #define ADJUST_DELTA(delta, virt) (((delta) << 1) + !!(virt)) + #else +diff -uNr gcc-10.3.0.org/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C gcc-10.3.0/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C +--- gcc-10.3.0.org/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C 2021-04-08 19:56:28.913750871 +0800 ++++ gcc-10.3.0/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C 2022-03-23 17:40:29.365279975 +0800 +@@ -25,7 +25,7 @@ + h<&B::j>(); // { dg-error "" } + g<(void (A::*)()) &A::f>(); // { dg-error "" "" { xfail c++11 } } + h<(int A::*) &A::i>(); // { dg-error "" "" { xfail c++11 } } +- g<(void (A::*)()) &B::f>(); // { dg-error "" "" { xfail { c++11 && { aarch64*-*-* arm*-*-* mips*-*-* } } } } ++ g<(void (A::*)()) &B::f>(); // { dg-error "" "" { xfail { c++11 && { aarch64*-*-* arm*-*-* mips*-*-* loongarch*-*-* } } } } + h<(int A::*) &B::j>(); // { dg-error "" } + g<(void (A::*)()) 0>(); // { dg-error "" "" { target { ! c++11 } } } + h<(int A::*) 0>(); // { dg-error "" "" { target { ! c++11 } } } +diff -uNr gcc-10.3.0.org/gcc/testsuite/go.test/go-test.exp gcc-10.3.0/gcc/testsuite/go.test/go-test.exp +--- gcc-10.3.0.org/gcc/testsuite/go.test/go-test.exp 2021-04-08 19:56:29.633759564 +0800 ++++ gcc-10.3.0/gcc/testsuite/go.test/go-test.exp 2022-03-23 17:40:29.365279975 +0800 +@@ -251,6 +251,9 @@ + "riscv64-*-*" { + set goarch "riscv64" + } ++ "loongarch64-*-*" { ++ set goarch "loongarch64" ++ } + "s390*-*-*" { + if [check_effective_target_ilp32] { + set goarch "s390" +diff -uNr gcc-10.3.0.org/gcc/testsuite/lib/target-supports.exp gcc-10.3.0/gcc/testsuite/lib/target-supports.exp +--- gcc-10.3.0.org/gcc/testsuite/lib/target-supports.exp 2021-04-08 19:56:29.661759901 +0800 ++++ gcc-10.3.0/gcc/testsuite/lib/target-supports.exp 2022-03-23 17:40:29.366279967 +0800 +@@ -287,6 +287,10 @@ + return 1 + } + ++ if { [ string first "loongarch" $target_cpu ] >= 0 } { ++ return 1 ++ } ++ + # All AIX targets should support it + + if { [istarget *-*-aix*] } { +@@ -707,6 +711,7 @@ + || [istarget m68k-*-elf] + || [istarget m68k-*-uclinux*] + || [istarget mips*-*-elf*] ++ || [istarget loongarch*-*-elf*] + || [istarget mmix-*-*] + || [istarget mn10300-*-elf*] + || [istarget moxie-*-elf*] +@@ -1277,6 +1282,14 @@ + }] + } + ++ if { [istarget loongarch*-*-*] } { ++ return [check_no_compiler_messages hard_float assembly { ++ #if (defined __loongarch_soft_float) ++ #error __loongarch_soft_float ++ #endif ++ }] ++ } ++ + # This proc is actually checking the availabilty of FPU + # support for doubles, so on the RX we must fail if the + # 64-bit double multilib has been selected. +@@ -6380,6 +6393,7 @@ + expr { [istarget i?86-*-*] || [istarget x86_64-*-*] + || [istarget powerpc*-*-*] + || [istarget mips-sde-elf] ++ || [istarget loongarch-sde-elf] + || [istarget mipsisa64*-*-*] + || [istarget ia64-*-*] + || [istarget aarch64*-*-*] +@@ -7700,6 +7714,7 @@ + || [istarget crisv32-*-*] || [istarget cris-*-*] + || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) + || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) ++ || [istarget loongarch*-*-*] + || [check_effective_target_mips_llsc] }}] + } + +@@ -9626,6 +9641,7 @@ + || [istarget frv*-*-*] + || [istarget i?86-*-*] || [istarget x86_64-*-*] + || [istarget mips*-*-*] ++ || [istarget loongarch*-*-*] + || [istarget s390*-*-*] + || [istarget riscv*-*-*] + || [istarget sh*-*-*] } { +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/crtfastmath.c gcc-10.3.0/libgcc/config/loongarch/crtfastmath.c +--- gcc-10.3.0.org/libgcc/config/loongarch/crtfastmath.c 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/crtfastmath.c 2022-03-23 17:40:29.366279967 +0800 +@@ -0,0 +1,53 @@ ++/* Copyright (C) 2010-2018 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License ++ and a copy of the GCC Runtime Library Exception along with this ++ program; see the files COPYING3 and COPYING.RUNTIME respectively. ++ If not, see . */ ++ ++#ifdef __loongarch_hard_float ++ ++/* Flush denormalized numbers to zero. */ ++#define _FPU_FLUSH_TZ 0x1000000 ++ ++/* Rounding control. */ ++#define _FPU_RC_NEAREST 0x0 /* RECOMMENDED */ ++#define _FPU_RC_ZERO 0x1 ++#define _FPU_RC_UP 0x2 ++#define _FPU_RC_DOWN 0x3 ++ ++/* Enable interrupts for IEEE exceptions. */ ++#define _FPU_IEEE 0x00000F80 ++ ++/* Macros for accessing the hardware control word. */ ++#define _FPU_GETCW(cw) __asm__ ("movgr2fcsr %0,$r1" : "=r" (cw)) ++#define _FPU_SETCW(cw) __asm__ ("movfcsr2gr %0,$r1" : : "r" (cw)) ++ ++static void __attribute__((constructor)) ++set_fast_math (void) ++{ ++ unsigned int fcr; ++ ++ /* Flush to zero, round to nearest, IEEE exceptions disabled. */ ++ fcr = _FPU_FLUSH_TZ | _FPU_RC_NEAREST; ++ ++ _FPU_SETCW(fcr); ++} ++ ++#endif /* __loongarch_hard_float */ +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/crti.S gcc-10.3.0/libgcc/config/loongarch/crti.S +--- gcc-10.3.0.org/libgcc/config/loongarch/crti.S 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/crti.S 2022-03-23 17:40:29.366279967 +0800 +@@ -0,0 +1,43 @@ ++/* Copyright (C) 2001-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ ++ ++/* 4 slots for argument spill area. 1 for cpreturn, 1 for stack. ++ Return spill offset of 40 and 20. Aligned to 16 bytes for n32. */ ++ ++ .section .init,"ax",@progbits ++ .globl _init ++ .type _init,@function ++_init: ++ addi.d $r3,$r3,-48 ++ st.d $r1,$r3,40 ++ addi.d $r3,$r3,48 ++ jirl $r0,$r1,0 ++ ++ .section .fini,"ax",@progbits ++ .globl _fini ++ .type _fini,@function ++_fini: ++ addi.d $r3,$r3,-48 ++ st.d $r1,$r3,40 ++ addi.d $r3,$r3,48 ++ jirl $r0,$r1,0 +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/crtn.S gcc-10.3.0/libgcc/config/loongarch/crtn.S +--- gcc-10.3.0.org/libgcc/config/loongarch/crtn.S 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/crtn.S 2022-03-23 17:40:29.366279967 +0800 +@@ -0,0 +1,39 @@ ++/* Copyright (C) 2001-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ ++ ++/* 4 slots for argument spill area. 1 for cpreturn, 1 for stack. ++ Return spill offset of 40 and 20. Aligned to 16 bytes for n32. */ ++ ++ ++ .section .init,"ax",@progbits ++init: ++ ld.d $r1,$r3,40 ++ addi.d $r3,$r3,48 ++ jirl $r0,$r1,0 ++ ++ .section .fini,"ax",@progbits ++fini: ++ ld.d $r1,$r3,40 ++ addi.d $r3,$r3,48 ++ jirl $r0,$r1,0 ++ +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/gthr-loongnixsde.h gcc-10.3.0/libgcc/config/loongarch/gthr-loongnixsde.h +--- gcc-10.3.0.org/libgcc/config/loongarch/gthr-loongnixsde.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/gthr-loongnixsde.h 2022-03-23 17:40:29.366279967 +0800 +@@ -0,0 +1,237 @@ ++/* LARCH SDE threads compatibility routines for libgcc2 and libobjc. */ ++/* Compile this one with gcc. */ ++/* Copyright (C) 2006-2018 Free Software Foundation, Inc. ++ Contributed by Nigel Stephens ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ ++ ++#ifndef GCC_GTHR_LARCHSDE_H ++#define GCC_GTHR_LARCHSDE_H ++ ++/* LARCH SDE threading API specific definitions. ++ Easy, since the interface is pretty much one-to-one. */ ++ ++#define __GTHREADS 1 ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef __sdethread_key_t __gthread_key_t; ++typedef __sdethread_once_t __gthread_once_t; ++typedef __sdethread_mutex_t __gthread_mutex_t; ++ ++typedef struct { ++ long depth; ++ __sdethread_t owner; ++ __sdethread_mutex_t actual; ++} __gthread_recursive_mutex_t; ++ ++#define __GTHREAD_MUTEX_INIT __SDETHREAD_MUTEX_INITIALIZER("gthr") ++#define __GTHREAD_ONCE_INIT __SDETHREAD_ONCE_INIT ++static inline int ++__gthread_recursive_mutex_init_function(__gthread_recursive_mutex_t *__mutex); ++#define __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION __gthread_recursive_mutex_init_function ++ ++#if SUPPORTS_WEAK && GTHREAD_USE_WEAK ++# define __gthrw(name) \ ++ static __typeof(name) __gthrw_ ## name __attribute__ ((__weakref__(#name))); ++# define __gthrw_(name) __gthrw_ ## name ++#else ++# define __gthrw(name) ++# define __gthrw_(name) name ++#endif ++ ++__gthrw(__sdethread_once) ++__gthrw(__sdethread_key_create) ++__gthrw(__sdethread_key_delete) ++__gthrw(__sdethread_getspecific) ++__gthrw(__sdethread_setspecific) ++ ++__gthrw(__sdethread_self) ++ ++__gthrw(__sdethread_mutex_lock) ++__gthrw(__sdethread_mutex_trylock) ++__gthrw(__sdethread_mutex_unlock) ++ ++__gthrw(__sdethread_mutex_init) ++ ++__gthrw(__sdethread_threading) ++ ++#if SUPPORTS_WEAK && GTHREAD_USE_WEAK ++ ++static inline int ++__gthread_active_p (void) ++{ ++ return !!(void *)&__sdethread_threading; ++} ++ ++#else /* not SUPPORTS_WEAK */ ++ ++static inline int ++__gthread_active_p (void) ++{ ++ return 1; ++} ++ ++#endif /* SUPPORTS_WEAK */ ++ ++static inline int ++__gthread_once (__gthread_once_t *__once, void (*__func) (void)) ++{ ++ if (__gthread_active_p ()) ++ return __gthrw_(__sdethread_once) (__once, __func); ++ else ++ return -1; ++} ++ ++static inline int ++__gthread_key_create (__gthread_key_t *__key, void (*__dtor) (void *)) ++{ ++ return __gthrw_(__sdethread_key_create) (__key, __dtor); ++} ++ ++static inline int ++__gthread_key_delete (__gthread_key_t __key) ++{ ++ return __gthrw_(__sdethread_key_delete) (__key); ++} ++ ++static inline void * ++__gthread_getspecific (__gthread_key_t __key) ++{ ++ return __gthrw_(__sdethread_getspecific) (__key); ++} ++ ++static inline int ++__gthread_setspecific (__gthread_key_t __key, const void *__ptr) ++{ ++ return __gthrw_(__sdethread_setspecific) (__key, __ptr); ++} ++ ++static inline int ++__gthread_mutex_destroy (__gthread_mutex_t * UNUSED(__mutex)) ++{ ++ return 0; ++} ++ ++static inline int ++__gthread_mutex_lock (__gthread_mutex_t *__mutex) ++{ ++ if (__gthread_active_p ()) ++ return __gthrw_(__sdethread_mutex_lock) (__mutex); ++ else ++ return 0; ++} ++ ++static inline int ++__gthread_mutex_trylock (__gthread_mutex_t *__mutex) ++{ ++ if (__gthread_active_p ()) ++ return __gthrw_(__sdethread_mutex_trylock) (__mutex); ++ else ++ return 0; ++} ++ ++static inline int ++__gthread_mutex_unlock (__gthread_mutex_t *__mutex) ++{ ++ if (__gthread_active_p ()) ++ return __gthrw_(__sdethread_mutex_unlock) (__mutex); ++ else ++ return 0; ++} ++ ++static inline int ++__gthread_recursive_mutex_init_function (__gthread_recursive_mutex_t *__mutex) ++{ ++ __mutex->depth = 0; ++ __mutex->owner = __gthrw_(__sdethread_self) (); ++ return __gthrw_(__sdethread_mutex_init) (&__mutex->actual, NULL); ++} ++ ++static inline int ++__gthread_recursive_mutex_lock (__gthread_recursive_mutex_t *__mutex) ++{ ++ if (__gthread_active_p ()) ++ { ++ __sdethread_t __me = __gthrw_(__sdethread_self) (); ++ ++ if (__mutex->owner != __me) ++ { ++ __gthrw_(__sdethread_mutex_lock) (&__mutex->actual); ++ __mutex->owner = __me; ++ } ++ ++ __mutex->depth++; ++ } ++ return 0; ++} ++ ++static inline int ++__gthread_recursive_mutex_trylock (__gthread_recursive_mutex_t *__mutex) ++{ ++ if (__gthread_active_p ()) ++ { ++ __sdethread_t __me = __gthrw_(__sdethread_self) (); ++ ++ if (__mutex->owner != __me) ++ { ++ if (__gthrw_(__sdethread_mutex_trylock) (&__mutex->actual)) ++ return 1; ++ __mutex->owner = __me; ++ } ++ ++ __mutex->depth++; ++ } ++ return 0; ++} ++ ++static inline int ++__gthread_recursive_mutex_unlock (__gthread_recursive_mutex_t *__mutex) ++{ ++ if (__gthread_active_p ()) ++ { ++ if (--__mutex->depth == 0) ++ { ++ __mutex->owner = (__sdethread_t) 0; ++ __gthrw_(__sdethread_mutex_unlock) (&__mutex->actual); ++ } ++ } ++ return 0; ++} ++ ++static inline int ++__gthread_recursive_mutex_destroy (__gthread_recursive_mutex_t ++ * UNUSED(__mutex)) ++{ ++ return 0; ++} ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* ! GCC_GTHR_LARCHSDE_H */ +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/linux-unwind.h gcc-10.3.0/libgcc/config/loongarch/linux-unwind.h +--- gcc-10.3.0.org/libgcc/config/loongarch/linux-unwind.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/linux-unwind.h 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,91 @@ ++/* DWARF2 EH unwinding support for LARCH Linux. ++ Copyright (C) 2004-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ ++ ++#ifndef inhibit_libc ++/* Do code reading to identify a signal frame, and set the frame ++ state data appropriately. See unwind-dw2.c for the structs. */ ++ ++#include ++#include ++ ++/* The third parameter to the signal handler points to something with ++ * this structure defined in asm/ucontext.h, but the name clashes with ++ * struct ucontext from sys/ucontext.h so this private copy is used. */ ++typedef struct _sig_ucontext { ++ unsigned long uc_flags; ++ struct _sig_ucontext *uc_link; ++ stack_t uc_stack; ++ struct sigcontext uc_mcontext; ++ sigset_t uc_sigmask; ++} _sig_ucontext_t; ++ ++#define MD_FALLBACK_FRAME_STATE_FOR loongarch_fallback_frame_state ++ ++static _Unwind_Reason_Code ++loongarch_fallback_frame_state (struct _Unwind_Context *context, ++ _Unwind_FrameState *fs) ++{ ++ u_int32_t *pc = (u_int32_t *) context->ra; ++ struct sigcontext *sc; ++ _Unwind_Ptr new_cfa; ++ int i; ++ ++ /* 03822c0b dli a7, 0x8b (sigreturn) */ ++ /* 002b0000 syscall 0 */ ++ if (pc[1] != 0x002b0000) ++ return _URC_END_OF_STACK; ++ if (pc[0] == 0x03822c0b) ++ { ++ struct rt_sigframe { ++ u_int32_t ass[4]; /* Argument save space for o32. */ ++ u_int32_t trampoline[2]; ++ siginfo_t info; ++ _sig_ucontext_t uc; ++ } *rt_ = context->cfa; ++ sc = &rt_->uc.uc_mcontext; ++ } ++ else ++ return _URC_END_OF_STACK; ++ ++ new_cfa = (_Unwind_Ptr) sc; ++ fs->regs.cfa_how = CFA_REG_OFFSET; ++ fs->regs.cfa_reg = __LIBGCC_STACK_POINTER_REGNUM__; ++ fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; ++ ++ for (i = 0; i < 32; i++) { ++ fs->regs.reg[i].how = REG_SAVED_OFFSET; ++ fs->regs.reg[i].loc.offset ++ = (_Unwind_Ptr)&(sc->sc_regs[i]) - new_cfa; ++ } ++ ++ fs->signal_frame = 1; ++ fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].how ++ = REG_SAVED_VAL_OFFSET; ++ fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].loc.offset ++ = (_Unwind_Ptr)(sc->sc_pc) - new_cfa; ++ fs->retaddr_column = __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__; ++ ++ return _URC_NO_REASON; ++} ++#endif +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/sfp-machine.h gcc-10.3.0/libgcc/config/loongarch/sfp-machine.h +--- gcc-10.3.0.org/libgcc/config/loongarch/sfp-machine.h 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/sfp-machine.h 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,148 @@ ++/* softfp machine description for LARCH. ++ Copyright (C) 2009-2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ ++ ++#ifdef __loongarch64 ++#define _FP_W_TYPE_SIZE 64 ++#define _FP_W_TYPE unsigned long long ++#define _FP_WS_TYPE signed long long ++#define _FP_I_TYPE long long ++ ++typedef int TItype __attribute__ ((mode (TI))); ++typedef unsigned int UTItype __attribute__ ((mode (TI))); ++#define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype)) ++ ++#define _FP_MUL_MEAT_S(R,X,Y) \ ++ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) ++#define _FP_MUL_MEAT_D(R,X,Y) \ ++ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) ++#define _FP_MUL_MEAT_Q(R,X,Y) \ ++ _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) ++ ++#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) ++#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) ++#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) ++ ++# define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) ++# define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) ++# define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 ++#else ++#define _FP_W_TYPE_SIZE 32 ++#define _FP_W_TYPE unsigned int ++#define _FP_WS_TYPE signed int ++#define _FP_I_TYPE int ++ ++#define _FP_MUL_MEAT_S(R,X,Y) \ ++ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) ++#define _FP_MUL_MEAT_D(R,X,Y) \ ++ _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) ++#define _FP_MUL_MEAT_Q(R,X,Y) \ ++ _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) ++ ++#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) ++#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) ++#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) ++ ++# define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) ++# define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 ++# define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 ++#endif ++ ++/* The type of the result of a floating point comparison. This must ++ match __libgcc_cmp_return__ in GCC for the target. */ ++typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); ++#define CMPtype __gcc_CMPtype ++ ++#define _FP_NANSIGN_S 0 ++#define _FP_NANSIGN_D 0 ++#define _FP_NANSIGN_Q 0 ++ ++#define _FP_KEEPNANFRACP 1 ++# define _FP_QNANNEGATEDP 0 ++ ++/* NaN payloads should be preserved for NAN2008. */ ++# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ ++ do \ ++ { \ ++ R##_s = X##_s; \ ++ _FP_FRAC_COPY_##wc (R, X); \ ++ R##_c = FP_CLS_NAN; \ ++ } \ ++ while (0) ++ ++#ifdef __loongarch_hard_float ++#define FP_EX_INVALID 0x100000 ++#define FP_EX_DIVZERO 0x080000 ++#define FP_EX_OVERFLOW 0x040000 ++#define FP_EX_UNDERFLOW 0x020000 ++#define FP_EX_INEXACT 0x010000 ++#define FP_EX_ALL \ ++ (FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \ ++ | FP_EX_INEXACT) ++ ++#define FP_EX_ENABLE_SHIFT 16 ++#define FP_EX_CAUSE_SHIFT 8 ++ ++#define FP_RND_NEAREST 0x000 ++#define FP_RND_ZERO 0x100 ++#define FP_RND_PINF 0x200 ++#define FP_RND_MINF 0x300 ++#define FP_RND_MASK 0x300 ++ ++#define _FP_DECL_EX \ ++ unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST ++ ++#define FP_INIT_ROUNDMODE \ ++ do { \ ++ _fcsr = __builtin_loongarch_movfcsr2gr (0); \ ++ } while (0) ++ ++#define FP_ROUNDMODE (_fcsr & FP_RND_MASK) ++ ++#define FP_TRAPPING_EXCEPTIONS ((_fcsr << FP_EX_ENABLE_SHIFT) & FP_EX_ALL) ++ ++#define FP_HANDLE_EXCEPTIONS \ ++ do { \ ++ _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \ ++ _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \ ++ __builtin_loongarch_movgr2fcsr (0, _fcsr); \ ++ } while (0) ++ ++#else ++#define FP_EX_INVALID (1 << 4) ++#define FP_EX_DIVZERO (1 << 3) ++#define FP_EX_OVERFLOW (1 << 2) ++#define FP_EX_UNDERFLOW (1 << 1) ++#define FP_EX_INEXACT (1 << 0) ++#endif ++ ++#define _FP_TININESS_AFTER_ROUNDING 1 ++ ++#define __LITTLE_ENDIAN 1234 ++ ++# define __BYTE_ORDER __LITTLE_ENDIAN ++ ++/* Define ALIASNAME as a strong alias for NAME. */ ++# define strong_alias(name, aliasname) _strong_alias(name, aliasname) ++# define _strong_alias(name, aliasname) \ ++ extern __typeof (name) aliasname __attribute__ ((alias (#name))); +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/t-crtstuff gcc-10.3.0/libgcc/config/loongarch/t-crtstuff +--- gcc-10.3.0.org/libgcc/config/loongarch/t-crtstuff 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/t-crtstuff 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,2 @@ ++# Don't let CTOR_LIST end up in sdata section. ++CRTSTUFF_T_CFLAGS = -G 0 +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/t-elf gcc-10.3.0/libgcc/config/loongarch/t-elf +--- gcc-10.3.0.org/libgcc/config/loongarch/t-elf 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/t-elf 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,3 @@ ++# We must build libgcc2.a with -G 0, in case the user wants to link ++# without the $gp register. ++HOST_LIBGCC2_CFLAGS += -G 0 +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/t-loongarch gcc-10.3.0/libgcc/config/loongarch/t-loongarch +--- gcc-10.3.0.org/libgcc/config/loongarch/t-loongarch 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/t-loongarch 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,9 @@ ++LIB2_SIDITI_CONV_FUNCS = yes ++ ++softfp_float_modes := ++softfp_int_modes := si di ++softfp_extensions := ++softfp_truncations := ++softfp_exclude_libgcc2 := n ++ ++LIB2ADD_ST += $(srcdir)/config/loongarch/lib2funcs.c +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/t-loongarch64 gcc-10.3.0/libgcc/config/loongarch/t-loongarch64 +--- gcc-10.3.0.org/libgcc/config/loongarch/t-loongarch64 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/t-loongarch64 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1 @@ ++softfp_int_modes += ti +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/t-sdemtk gcc-10.3.0/libgcc/config/loongarch/t-sdemtk +--- gcc-10.3.0.org/libgcc/config/loongarch/t-sdemtk 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/t-sdemtk 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,3 @@ ++# Don't build FPBIT and DPBIT; we'll be using the SDE soft-float library. ++FPBIT = ++DPBIT = +diff -uNr gcc-10.3.0.org/libgcc/config/loongarch/t-softfp-tf gcc-10.3.0/libgcc/config/loongarch/t-softfp-tf +--- gcc-10.3.0.org/libgcc/config/loongarch/t-softfp-tf 1970-01-01 08:00:00.000000000 +0800 ++++ gcc-10.3.0/libgcc/config/loongarch/t-softfp-tf 2022-03-23 17:40:29.367279959 +0800 +@@ -0,0 +1,3 @@ ++softfp_float_modes += tf ++softfp_extensions += sftf dftf ++softfp_truncations += tfsf tfdf +diff -uNr gcc-10.3.0.org/libgcc/config.host gcc-10.3.0/libgcc/config.host +--- gcc-10.3.0.org/libgcc/config.host 2021-04-08 19:56:29.805761640 +0800 ++++ gcc-10.3.0/libgcc/config.host 2022-03-23 17:40:29.368279951 +0800 +@@ -166,6 +166,23 @@ + fi + tmake_file="${tmake_file} t-softfp" + ;; ++loongarch*-*-*) ++ # All MIPS targets provide a full set of FP routines. ++ cpu_type=loongarch ++ tmake_file="loongarch/t-loongarch" ++ if test "${libgcc_cv_loongarch_hard_float}" = yes; then ++ tmake_file="${tmake_file} t-hardfp-sfdf t-hardfp" ++ else ++ tmake_file="${tmake_file} t-softfp-sfdf" ++ fi ++ if test "${ac_cv_sizeof_long_double}" = 16; then ++ tmake_file="${tmake_file} loongarch/t-softfp-tf" ++ fi ++ if test "${host_address}" = 64; then ++ tmake_file="${tmake_file} loongarch/t-loongarch64" ++ fi ++ tmake_file="${tmake_file} t-softfp" ++ ;; + nds32*-*) + cpu_type=nds32 + ;; +@@ -998,6 +1015,16 @@ + esac + md_unwind_header=mips/linux-unwind.h + ;; ++loongarch*-*-linux*) # Linux MIPS, either endian. ++ extra_parts="$extra_parts crtfastmath.o" ++ tmake_file="${tmake_file} t-crtfm" ++ case ${host} in ++ *) ++ tmake_file="${tmake_file} t-slibgcc-libgcc" ++ ;; ++ esac ++ md_unwind_header=loongarch/linux-unwind.h ++ ;; + mips*-sde-elf*) + tmake_file="$tmake_file mips/t-crtstuff mips/t-mips16" + case "${with_newlib}" in +@@ -1010,6 +1037,19 @@ + ;; + esac + extra_parts="$extra_parts crti.o crtn.o" ++ ;; ++loongarch*-sde-elf*) ++ tmake_file="$tmake_file loongarch/t-crtstuff" ++ case "${with_newlib}" in ++ yes) ++ # newlib / libgloss. ++ ;; ++ *) ++ # MIPS toolkit libraries. ++ tmake_file="$tmake_file loongarch/t-sdemtk" ++ ;; ++ esac ++ extra_parts="$extra_parts crti.o crtn.o" + ;; + mipsisa32-*-elf* | mipsisa32el-*-elf* | \ + mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ +diff -uNr gcc-10.3.0.org/libgcc/configure gcc-10.3.0/libgcc/configure +--- gcc-10.3.0.org/libgcc/configure 2021-04-08 19:56:29.861762316 +0800 ++++ gcc-10.3.0/libgcc/configure 2022-03-28 11:40:19.015621645 +0800 +@@ -2412,6 +2412,9 @@ + # sets the default TLS model and affects inlining. + PICFLAG=-fPIC + ;; ++ loongarch*-*-*) ++ PICFLAG=-fpic ++ ;; + mips-sgi-irix6*) + # PIC is the default. + ;; +@@ -5062,7 +5065,7 @@ + # word size rather than the address size. + cat > conftest.c < conftest.c <