From 88ee34f8067825f2b67ed3a225feccec3116b8a1 Mon Sep 17 00:00:00 2001
From: Xing Li <lixing@loongson.cn>
Date: Wed, 19 Jul 2023 15:37:59 +0800
Subject: [PATCH] Sync code to gcc 8.3 vec.36

Fix some bug:
  LoongArch NOOP truncation
  gcc8 do_spec_2
---
 0001-Sync-to-gcc-8-vec-36.patch               | 30492 ++++++++++++++++
 ...ocessing-to-allow-in-function-argume.patch |   220 +
 ...-NOOP_TRUNCATION-and-fix-extendsidi2.patch |   101 +
 gcc.spec                                      |    21 +-
 4 files changed, 30830 insertions(+), 4 deletions(-)
 create mode 100644 0001-Sync-to-gcc-8-vec-36.patch
 create mode 100644 Improve-specs-processing-to-allow-in-function-argume.patch
 create mode 100644 LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch

diff --git a/0001-Sync-to-gcc-8-vec-36.patch b/0001-Sync-to-gcc-8-vec-36.patch
new file mode 100644
index 0000000..e41b234
--- /dev/null
+++ b/0001-Sync-to-gcc-8-vec-36.patch
@@ -0,0 +1,30492 @@
+From 474c84c016b0c36c9aace9a41d6d9df8107cf3e8 Mon Sep 17 00:00:00 2001
+From: Lixing <lixing@loongson.cn>
+Date: Wed, 19 Jul 2023 10:47:27 +0800
+Subject: [PATCH] Sync to gcc-8-vec-36
+
+---
+ .../config/loongarch/loongarch-common.c       |   41 +-
+ gcc/config.gcc                                |  589 +-
+ gcc/config.host                               |   12 -
+ gcc/config/loongarch/constraints.md           |  371 +-
+ gcc/config/loongarch/driver-native.c          |   82 -
+ gcc/config/loongarch/elf.h                    |   56 +-
+ gcc/config/loongarch/frame-header-opt.c       |  292 -
+ gcc/config/loongarch/generic.md               |   21 +-
+ gcc/config/loongarch/genopt.sh                |  110 -
+ gcc/config/loongarch/genopts/genstr.sh        |  104 +
+ .../loongarch/genopts/loongarch-strings       |   68 +
+ gcc/config/loongarch/genopts/loongarch.opt.in |  242 +
+ gcc/config/loongarch/gnu-user.h               |  135 +-
+ gcc/config/loongarch/la464.md                 |  132 +
+ gcc/config/loongarch/larchintrin.h            |  495 +-
+ gcc/config/loongarch/lasx.md                  |  684 +-
+ gcc/config/loongarch/lasxintrin.h             |   46 +-
+ gcc/config/loongarch/linux-common.h           |   68 -
+ gcc/config/loongarch/linux.h                  |   37 +-
+ gcc/config/loongarch/loongarch-builtins.c     |  549 +-
+ gcc/config/loongarch/loongarch-c.c            |  158 +-
+ gcc/config/loongarch/loongarch-cpu.c          |  291 +
+ .../{loongarch-d.c => loongarch-cpu.h}        |   30 +-
+ gcc/config/loongarch/loongarch-cpus.def       |   38 -
+ gcc/config/loongarch/loongarch-def.c          |  232 +
+ gcc/config/loongarch/loongarch-def.h          |  161 +
+ gcc/config/loongarch/loongarch-driver.c       |  206 +
+ gcc/config/loongarch/loongarch-driver.h       |   72 +
+ gcc/config/loongarch/loongarch-ftypes.def     |  173 +-
+ gcc/config/loongarch/loongarch-modes.def      |    6 +-
+ gcc/config/loongarch/loongarch-opts.c         |  725 ++
+ gcc/config/loongarch/loongarch-opts.h         |   86 +-
+ gcc/config/loongarch/loongarch-protos.h       |  155 +-
+ gcc/config/loongarch/loongarch-str.h          |   68 +
+ gcc/config/loongarch/loongarch-tables.opt     |   34 -
+ gcc/config/loongarch/loongarch-tune.h         |   51 +
+ gcc/config/loongarch/loongarch.c              | 8440 +++++++++--------
+ gcc/config/loongarch/loongarch.h              | 1523 +--
+ gcc/config/loongarch/loongarch.md             | 3658 +++----
+ gcc/config/loongarch/loongarch.opt            |  252 +-
+ gcc/config/loongarch/lsx.md                   |  358 +-
+ gcc/config/loongarch/lsxintrin.h              |   46 +-
+ gcc/config/loongarch/predicates.md            |  250 +-
+ gcc/config/loongarch/rtems.h                  |   39 -
+ gcc/config/loongarch/sde.opt                  |   28 -
+ gcc/config/loongarch/sync.md                  |  746 +-
+ gcc/config/loongarch/t-linux                  |   65 +-
+ gcc/config/loongarch/t-loongarch              |   59 +-
+ gcc/config/loongarch/x-native                 |    3 -
+ libgcc/config/loongarch/crtfastmath.c         |   48 +-
+ libgcc/config/loongarch/crti.S                |   43 -
+ libgcc/config/loongarch/crtn.S                |   39 -
+ libgcc/config/loongarch/gthr-loongnixsde.h    |  237 -
+ libgcc/config/loongarch/linux-unwind.h        |   27 +-
+ libgcc/config/loongarch/sfp-machine.h         |  166 +-
+ libgcc/config/loongarch/t-elf                 |    3 -
+ libgcc/config/loongarch/t-loongarch           |    2 -
+ libgcc/config/loongarch/t-sdemtk              |    3 -
+ libgcc/config/loongarch/t-vr                  |    0
+ 59 files changed, 12128 insertions(+), 10527 deletions(-)
+ delete mode 100644 gcc/config/loongarch/driver-native.c
+ delete mode 100644 gcc/config/loongarch/frame-header-opt.c
+ delete mode 100644 gcc/config/loongarch/genopt.sh
+ create mode 100755 gcc/config/loongarch/genopts/genstr.sh
+ create mode 100644 gcc/config/loongarch/genopts/loongarch-strings
+ create mode 100644 gcc/config/loongarch/genopts/loongarch.opt.in
+ create mode 100644 gcc/config/loongarch/la464.md
+ delete mode 100644 gcc/config/loongarch/linux-common.h
+ create mode 100644 gcc/config/loongarch/loongarch-cpu.c
+ rename gcc/config/loongarch/{loongarch-d.c => loongarch-cpu.h} (59%)
+ delete mode 100644 gcc/config/loongarch/loongarch-cpus.def
+ create mode 100644 gcc/config/loongarch/loongarch-def.c
+ create mode 100644 gcc/config/loongarch/loongarch-def.h
+ create mode 100644 gcc/config/loongarch/loongarch-driver.c
+ create mode 100644 gcc/config/loongarch/loongarch-driver.h
+ create mode 100644 gcc/config/loongarch/loongarch-opts.c
+ create mode 100644 gcc/config/loongarch/loongarch-str.h
+ delete mode 100644 gcc/config/loongarch/loongarch-tables.opt
+ create mode 100644 gcc/config/loongarch/loongarch-tune.h
+ delete mode 100644 gcc/config/loongarch/rtems.h
+ delete mode 100644 gcc/config/loongarch/sde.opt
+ delete mode 100644 gcc/config/loongarch/x-native
+ delete mode 100644 libgcc/config/loongarch/crti.S
+ delete mode 100644 libgcc/config/loongarch/crtn.S
+ delete mode 100644 libgcc/config/loongarch/gthr-loongnixsde.h
+ delete mode 100644 libgcc/config/loongarch/t-elf
+ delete mode 100644 libgcc/config/loongarch/t-sdemtk
+ delete mode 100644 libgcc/config/loongarch/t-vr
+
+diff --git a/gcc/common/config/loongarch/loongarch-common.c b/gcc/common/config/loongarch/loongarch-common.c
+index afbbc3ad0..ccdc8f498 100644
+--- a/gcc/common/config/loongarch/loongarch-common.c
++++ b/gcc/common/config/loongarch/loongarch-common.c
+@@ -1,5 +1,5 @@
+-/* Common hooks for LARCH.
+-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
++/* Common hooks for LoongArch.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+@@ -25,44 +25,21 @@ along with GCC; see the file COPYING3.  If not see
+ #include "common/common-target-def.h"
+ #include "opts.h"
+ #include "flags.h"
++#include "diagnostic-core.h"
+ 
+-#undef  TARGET_OPTION_OPTIMIZATION_TABLE
++#undef	TARGET_OPTION_OPTIMIZATION_TABLE
+ #define TARGET_OPTION_OPTIMIZATION_TABLE loongarch_option_optimization_table
+ 
+ /* Set default optimization options.  */
+ static const struct default_options loongarch_option_optimization_table[] =
+ {
+-    { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
+-    { OPT_LEVELS_NONE, 0, NULL, 0 }
++  { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
++  /* Enable -fsched-pressure by default when optimizing.  */
++  { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
++  { OPT_LEVELS_NONE, 0, NULL, 0 }
+ };
+ 
+-/* Implement TARGET_HANDLE_OPTION.  */
+-
+-static bool
+-loongarch_handle_option (struct gcc_options *opts,
+-		    struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+-		    const struct cl_decoded_option *decoded,
+-		    location_t loc ATTRIBUTE_UNUSED)
+-{
+-  size_t code = decoded->opt_index;
+-
+-  switch (code)
+-    {
+-    case OPT_mno_flush_func:
+-      opts->x_loongarch_cache_flush_func = NULL;
+-      return true;
+-
+-    default:
+-      return true;
+-    }
+-}
+-
+ #undef TARGET_DEFAULT_TARGET_FLAGS
+-#define TARGET_DEFAULT_TARGET_FLAGS		\
+-  (TARGET_DEFAULT				\
+-   | TARGET_CPU_DEFAULT				\
+-   | MASK_CHECK_ZERO_DIV)
+-#undef TARGET_HANDLE_OPTION
+-#define TARGET_HANDLE_OPTION loongarch_handle_option
++#define TARGET_DEFAULT_TARGET_FLAGS	MASK_CHECK_ZERO_DIV
+ 
+ struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index ba061efa4..cca2e6e43 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -427,10 +427,10 @@ lm32*)
+ 	;;
+ loongarch*-*-*)
+ 	cpu_type=loongarch
+-	d_target_objs="loongarch-d.o"
+ 	extra_headers="lasxintrin.h lsxintrin.h larchintrin.h"
+-	extra_objs="frame-header-opt.o loongarch-c.o loongarch-builtins.o"
+-	extra_options="${extra_options} g.opt fused-madd.opt loongarch/loongarch-tables.opt"
++	extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
++	extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
++	extra_options="${extra_options} g.opt fused-madd.opt"
+   ;;
+ m32r*-*-*)
+         cpu_type=m32r
+@@ -2193,54 +2193,30 @@ mips*-*-linux*)				# Linux MIPS, either endian.
+ 	fi
+ 	;;
+ loongarch*-*-linux*)
+-	case ${with_abi} in
+-	"")
+-		echo "not specify ABI, default is lp64 for loongarch64"
+-		with_abi=lp64 # for default
+-		;;
+-	lpx32)
+-                ;;
+-	lp32)
+-		;;
+-	lp64)
+-		;;
+-	*)
+-		echo "Unknown ABI used in --with-abi=$with_abi"
+-		exit 1
+-	esac
+-
+-	enable_multilib="yes"
+-	loongarch_multilibs="${with_multilib_list}"
+-	if test "$loongarch_multilibs" = "default"; then
+-		loongarch_multilibs="${with_abi}"
+-	fi
+-	loongarch_multilibs=`echo $loongarch_multilibs | sed -e 's/,/ /g'`
+-	for loongarch_multilib in ${loongarch_multilibs}; do
+-		case ${loongarch_multilib} in
+-		lp64 | lpx32 | lp32 )
+-			TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG},${loongarch_multilib}"
+-			;;
+-		*)
+-			echo "--with-multilib-list=${loongarch_multilib} not supported."
+-			exit 1
+-		esac
+-	done
+-	TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
++	tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
++	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
++	extra_options="${extra_options} linux-android.opt"
++	tmake_file="${tmake_file} loongarch/t-linux"
++	gnu_ld=yes
++	gas=yes
+ 
+-	if test `for one_abi in ${loongarch_multilibs}; do if [ x\$one_abi = x$with_abi ]; then echo 1; exit 0; fi; done; echo 0;` = "0"; then
+-		echo "--with-abi=${with_abi} must be one of --with-multilib-list=${with_multilib_list}"
+-		exit 1
+-	fi
++	# Force .init_array support.  The configure script cannot always
++	# automatically detect that GAS supports it, yet we require it.
++	gcc_cv_initfini_array=yes
++	;;
+ 
+-	tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/linux-common.h"
+-	extra_options="${extra_options} linux-android.opt"
++loongarch*-*-elf*)
++	tm_file="elfos.h newlib-stdint.h ${tm_file}"
++	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
+ 	tmake_file="${tmake_file} loongarch/t-linux"
+ 	gnu_ld=yes
+ 	gas=yes
++
+ 	# Force .init_array support.  The configure script cannot always
+ 	# automatically detect that GAS supports it, yet we require it.
+ 	gcc_cv_initfini_array=yes
+ 	;;
++
+ mips*-mti-elf*)
+ 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/n32-elf.h mips/sde.h mips/mti-elf.h"
+ 	tmake_file="mips/t-mti-elf"
+@@ -2295,31 +2271,6 @@ mips*-sde-elf*)
+ 	    ;;
+ 	esac
+ 	;;
+-loongarch*-sde-elf*)
+-	tm_file="elfos.h newlib-stdint.h ${tm_file} loongarch/elf.h  loongarch/sde.h"
+-#	tmake_file="loongarch/t-sde"
+-	extra_options="${extra_options} loongarch/sde.opt"
+-	case "${with_newlib}" in
+-	  yes)
+-	    # newlib / libgloss.
+-	    ;;
+-	  *)
+-	    # MIPS toolkit libraries.
+-	    tm_file="$tm_file loongarch/sdemtk.h"
+-	    tmake_file="$tmake_file loongarch/t-sdemtk"
+-	    case ${enable_threads} in
+-	      "" | yes | loongarchsde)
+-		thread_file='loongarchsde'
+-		;;
+-	    esac
+-	    ;;
+-	esac
+-	case ${target} in
+-	  loongarch*)
+-	    tm_defines="LARCH_ISA_DEFAULT=0 LARCH_ABI_DEFAULT=ABILP64"
+-	    ;;
+-	esac
+-	;;
+ mipsisa32-*-elf* | mipsisa32el-*-elf* | \
+ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \
+ mipsisa32r6-*-elf* | mipsisa32r6el-*-elf* | \
+@@ -3259,7 +3210,7 @@ case ${target} in
+         ;;
+ *-*-linux* | *-*-gnu*)
+ 	case ${target} in
+-	aarch64*-* | arm*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-*)
++	aarch64*-* | arm*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-* | loongarch*-*)
+ 		default_gnu_indirect_function=yes
+ 		;;
+ 	esac
+@@ -4450,57 +4401,466 @@ case "${target}" in
+ 		;;
+ 
+ 	loongarch*-*-*)
+-		supported_defaults="abi arch float fpu tune"
++		supported_defaults="abi arch tune fpu simd multilib-default"
++
++		# Local variables
++		unset \
++			abi_pattern      abi_default    \
++			abiext_pattern   abiext_default \
++			arch_pattern     arch_default   \
++			fpu_pattern	 fpu_default \
++			triplet_os       triplet_abi
++
++		# Infer ABI from the triplet.
++		case ${target} in
++		loongarch64-*-*-*f64)
++			abi_pattern="lp64d"
++			triplet_abi=""
++			;;
++		loongarch64-*-*-*f32)
++			abi_pattern="lp64f"
++			triplet_abi="f32"
++			;;
++		loongarch64-*-*-*sf)
++			abi_pattern="lp64s"
++			triplet_abi="sf"
++			;;
++		loongarch64-*-*)
++			abi_pattern="lp64[dfs]"
++			abi_default="lp64d"
++			triplet_abi=""
++			;;
++		*)
++			echo "Unsupported target ${target}." 1>&2
++			exit 1
++			;;
++		esac
++
++		abiext_pattern="*"
++		abiext_default="base"
++
++		# Get the canonical triplet (multiarch specifier).
++		case ${target} in
++		  *-linux-gnu*)	  triplet_os="linux-gnu";;
++		  *-linux-musl*)  triplet_os="linux-musl";;
++		  *-elf*)	  triplet_os="elf";;
++		  *)
++			  echo "Unsupported target ${target}." 1>&2
++			  exit 1
++			  ;;
++		esac
++
++		la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
+ 
++
++		# Perform initial sanity checks on --with-* options.
+ 		case ${with_arch} in
+-		loongarch64 | loongarch32)
+-			# OK
+-			default_loongarch_arch=$with_arch
++		"" | abi-default | loongarch64 | la[2346]64) ;; # OK, append here.
++		native)
++			if test x${host} != x${target}; then
++				echo "--with-arch=native is illegal for cross-compiler." 1>&2
++				exit 1
++			fi
+ 			;;
+-		"")
+-			# fallback
+-			default_loongarch_arch=loongarch64
++		*)
++			echo "Unknown arch in --with-arch=$with_arch" 1>&2
++			exit 1
++			;;
++		esac
++
++		case ${with_abi} in
++		lp64)
++			# Legacy
++			with_abi=lp64d
+ 			;;
++
++		"" | lp64d | lp64f | lp64s) ;; # OK, append here.
+ 		*)
+-			echo "Unknown arch given in --with-arch=$with_arch, available choices are: loongarch64" 1>&2
++			echo "Unsupported ABI given in --with-abi=$with_abi" 1>&2
+ 			exit 1
+ 			;;
+ 		esac
+ 
++		case ${with_abiext} in
++		"" | base) ;; # OK, append here.
++		*)
++			echo "Unsupported ABI extention type $with_abiext" 1>&2
++			exit 1
++			;;
++		esac
++
++		case ${with_fpu} in
++		"" | none | 32 | 64) ;; # OK, append here.
++		0)
++			# Convert "0" to "none" for upcoming checks.
++			with_fpu="none"
++			;;
++		*)
++			echo "Unknown fpu type in --with-fpu=$with_fpu" 1>&2
++			exit 1
++			;;
++		esac
++
++		case ${with_simd} in
++		"" | none) ;;
++		lsx | lasx)  # OK, append here.
++			case ${with_fpu} in
++			64) ;;
++			"") with_fpu=64 ;;
++			*)
++				echo "--with-simd=${with_simd} conflicts with --with-fpu=${with_fpu}" 1>&2
++				exit 1
++				;;
++			esac
++			;;
++
++		*)
++			echo "Unknown SIMD extension in --with-simd=$with_simd" 1>&2
++			exit 1
++			;;
++		esac
++
++
++		# Set default value for with_abi.
+ 		case ${with_abi} in
+-		lp64 | lp32)
+-			# OK
+-			default_loongarch_abi=$with_abi
++		"")
++			if test x${abi_default} != x; then
++				with_abi=${abi_default}
++			else
++				with_abi=${abi_pattern}
++			fi
++			;;
++
++		*)
++			if echo "${with_abi}" | grep -E "^${abi_pattern}$" > /dev/null; then
++				: # OK
++			else
++				echo "Incompatible options:" \
++				"--with-abi=${with_abi} and --target=${target}." 1>&2
++				exit 1
++			fi
+ 			;;
++		esac
++
++		# Set default value for with_abiext (internal)
++		case ${with_abiext} in
+ 		"")
+-			# fallback
+-			default_loongarch_abi=lp64
++			if test x${abiext_default} != x; then
++				with_abiext=${abiext_default}
++			else
++				with_abiext=${abiext_pattern}
++			fi
++			;;
++
++		*)
++			if echo "${with_abiext}" | grep -E "^${abiext_pattern}$" > /dev/null; then
++				: # OK
++			else
++				echo "The ABI extension type \"${with_abiext}\"" \
++				"is incompatible with --target=${target}." 1>&2
++				exit 1
++			fi
++
++			;;
++		esac
++
++		# Infer ISA-related default options from the ABI: pass 1
++		case ${with_abi}/${with_abiext} in
++		lp64*/base)
++			# architectures that support lp64* ABI
++			arch_pattern="native|abi-default|loongarch64|la[2346]64"
++			# default architecture for lp64* ABI
++			arch_default="abi-default"
+ 			;;
+ 		*)
+-			echo "Unknown ABI given in --with-abi=$with_abi, available choices are: lp32 lp64" 1>&2
++			echo "Unsupported ABI type ${with_abi}/${with_abiext}." 1>&2
+ 			exit 1
+ 			;;
+ 		esac
+ 
+-		case ${with_float} in
+-		"" | soft | hard)
+-			# OK
++		# Infer ISA-related default options from the ABI: pass 2
++		case ${with_abi}/${with_abiext} in
++		lp64d/base)
++			fpu_pattern="64"
++			;;
++		lp64f/base)
++			fpu_pattern="32|64"
++			fpu_default="32"
++			;;
++		lp64s/base)
++			fpu_pattern="none|32|64"
++			fpu_default="none"
+ 			;;
+ 		*)
+-			echo "Unknown floating point type used in --with-float=$with_float" 1>&2
++			echo "Unsupported ABI type ${with_abi}/${with_abiext}." 1>&2
+ 			exit 1
+ 			;;
+ 		esac
+ 
++		## Set default value for with_arch.
++		case ${with_arch} in
++		"")
++			if test x${arch_default} != x; then
++				with_arch=${arch_default}
++			else
++				with_arch=${arch_pattern}
++			fi
++			;;
++
++		*)
++			if echo "${with_arch}" | grep -E "^${arch_pattern}$" > /dev/null; then
++				: # OK
++			else
++				echo "${with_abi}/${with_abiext} ABI cannot be implemented with" \
++				"--with-arch=${with_arch}." 1>&2
++				exit 1
++			fi
++			;;
++		esac
++
++		## Set default value for with_fpu.
+ 		case ${with_fpu} in
+-		"" | single | double)
+-			# OK
++		"")
++			if test x${fpu_default} != x; then
++				with_fpu=${fpu_default}
++			else
++				with_fpu=${fpu_pattern}
++			fi
+ 			;;
++
+ 		*)
+-			echo "Unknown fpu type used in --with-fpu=$with_fpu" 1>&2
+-			exit 1
++			if echo "${with_fpu}" | grep -E "^${fpu_pattern}$" > /dev/null; then
++				: # OK
++			else
++				echo "${with_abi}/${with_abiext} ABI cannot be implemented with" \
++				"--with-fpu=${with_fpu}." 1>&2
++				exit 1
++			fi
++			;;
++		esac
++
++
++		# Check default with_tune configuration using with_arch.
++		case ${with_arch} in
++		loongarch64)
++			tune_pattern="native|abi-default|loongarch64|la[2346]64"
++			;;
++		*)
++			# By default, $with_tune == $with_arch
++			tune_pattern="*"
++			;;
++		esac
++
++		case ${with_tune} in
++		"") ;; # OK
++		*)
++			if echo "${with_tune}" | grep -E "^${tune_pattern}$" > /dev/null; then
++				: # OK
++			else
++				echo "Incompatible options: --with-tune=${with_tune}" \
++				"and --with-arch=${with_arch}." 1>&2
++				exit 1
++			fi
+ 			;;
+ 		esac
++
++		# Handle --with-multilib-default
++		if echo "${with_multilib_default}" \
++		| grep -E -e '[[:space:]]' -e '//' -e '/$' -e '^/' > /dev/null 2>&1; then
++			echo "Invalid argument to --with-multilib-default." 1>&2
++			exit 1
++		fi
++
++		if test x${with_multilib_default} = x; then
++			# Use -march=abi-default by default when building libraries.
++			with_multilib_default="/march=abi-default"
++		else
++			unset parse_state component
++			parse_state=arch
++			for component in $(echo "${with_multilib_default}" | tr '/' ' '); do
++				case ${parse_state},${component} in
++				arch,|arch,abi-default)
++					# ABI-default: use the ABI's default ARCH configuration for
++					# multilib library builds, unless otherwise specified
++					# in --with-multilib-list.
++					with_multilib_default="/march=abi-default"
++					parse_state=opts
++					;;
++				arch,fixed)
++					# Fixed: use the default gcc configuration for all multilib
++					# builds by default.
++					with_multilib_default=""
++					parse_state=opts
++					;;
++				arch,*)
++					with_multilib_default="/march=abi-default"
++					parse_state=opts
++					;&
++				opts,*)
++					with_multilib_default="${with_multilib_default}/${component}"
++					;;
++				esac
++			done
++			unset parse_state component
++		fi
++
++		# Handle --with-multilib-list.
++		if test x"${with_multilib_list}" = x \
++		   || test x"${with_multilib_list}" = xno \
++		   || test x"${with_multilib_list}" = xdefault \
++		   || test x"${enable_multilib}" != xyes; then
++
++			with_multilib_list="${with_abi}/${with_abiext}"
++		fi
++
++		# Check if the configured default ABI combination is included in
++		# ${with_multilib_list}.
++		loongarch_multilib_list_sane=no
++
++		# This one goes to TM_MULTILIB_CONFIG, for use in t-linux.
++		loongarch_multilib_list_make=""
++
++		# This one goes to tm_defines, for use in loongarch-driver.c.
++		loongarch_multilib_list_c=""
++
++		# ${with_multilib_list} should not contain whitespaces,
++		# consecutive commas or slashes.
++		if echo "${with_multilib_list}" \
++		| grep -E -e "[[:space:]]" -e '[,/][,/]' -e '[,/]$' -e '^[,/]' > /dev/null 2>&1; then
++			echo "Invalid argument to --with-multilib-list." 1>&2
++			exit 1
++		fi
++
++		unset component elem_abi_base elem_abi_ext elem_tmp parse_state all_abis
++		for elem in $(echo "${with_multilib_list}" | tr ',' ' '); do
++			unset elem_abi_base elem_abi_ext
++			parse_state="abi-base"
++
++			for component in $(echo "${elem}" | tr '/' ' '); do
++				case ${parse_state} in
++				abi-base)
++					# Base ABI type
++					case ${component} in
++					lp64 | lp64d) elem_tmp="ABI_BASE_LP64D,";;
++					lp64f) elem_tmp="ABI_BASE_LP64F,";;
++					lp64s) elem_tmp="ABI_BASE_LP64S,";;
++					*)
++						echo "Unknown base ABI \"${component}\" in --with-multilib-list." 1>&2
++						exit 1
++						;;
++					esac
++					loongarch_multilib_list_c="${loongarch_multilib_list_c}${elem_tmp}"
++					loongarch_multilib_list_make="${loongarch_multilib_list_make}mabi=${component}"
++					elem_abi_base="${component}"
++
++					parse_state="abi-ext"
++					;;
++
++				abi-ext)
++					# ABI extension type
++					case ${component} in
++					base)
++						elem_abi_ext="base"
++						loongarch_multilib_list_c="${loongarch_multilib_list_c}ABI_EXT_BASE,"
++						loongarch_multilib_list_make="${loongarch_multilib_list_make}" # Add nothing for now.
++						parse_state="arch"
++						continue;
++						;;
++					esac
++
++					# The default ABI extension is "base" if unspecified.
++					elem_abi_ext="base"
++					loongarch_multilib_list_c="${loongarch_multilib_list_c}ABI_EXT_BASE,"
++					loongarch_multilib_list_make="${loongarch_multilib_list_make}" # Add nothing for now.
++					parse_state="arch"
++					;&
++
++				arch)
++					# -march option
++					case ${component} in
++					abi-default | loongarch64 | la[2346]64) # OK, append here.
++						# Append -march spec for each multilib variant.
++						loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
++						;&
++
++					default)
++						# "/default" is equivalent to --with-multilib-default=fixed
++						parse_state="opts"
++						continue;
++						;;
++					esac
++
++					# If ARCH is unspecified for this multilib variant, use ${with_multllib_default}.
++					loongarch_multilib_list_make="${loongarch_multilib_list_make}${with_multilib_default}"
++					parse_state="opts"
++					;&
++
++				opts)
++					# Other compiler options for building libraries.
++					# (no static sanity check performed)
++					case ${component} in
++					*)
++						# Append other components as additional build options
++						# (without the prepending dash).
++						# Their validity should be examined by the compiler.
++						loongarch_multilib_list_make="${loongarch_multilib_list_make}/${component}"
++						;;
++					esac
++					;;
++
++				esac
++			done
++
++			case ${parse_state} in
++			    "abi-ext")
++					elem_abi_ext="base"
++					loongarch_multilib_list_c="${loongarch_multilib_list_c}ABI_EXT_BASE,"
++					loongarch_multilib_list_make="${loongarch_multilib_list_make}" # Add nothing for now.
++					;&
++			    "arch")
++					# If ARCH is unspecified for this multilib variant, use ${with_multllib_default}.
++					loongarch_multilib_list_make="${loongarch_multilib_list_make}${with_multilib_default}"
++					;&
++			    "opts")
++					;;
++			esac
++
++			# Check for repeated configuration of the same multilib variant.
++		        if echo "${elem_abi_base}/${elem_abi_ext}" \
++			 | grep -E "^(${all_abis%|})$" >/dev/null 2>&1; then
++				echo "Repeated multilib config of \"${elem_abi_base}/${elem_abi_ext}\" in --with-multilib-list."
++				exit 1
++			fi
++			all_abis+="${elem_abi_base}/${elem_abi_ext}|"
++
++
++			# Check if the default ABI configuration of the GCC binary
++			# is included in the enabled multilib variants.
++			if test x${elem_abi_base} = x${with_abi} \
++			&& test x${elem_abi_ext} = x${with_abiext}; then
++				loongarch_multilib_list_sane=yes
++			fi
++			loongarch_multilib_list_make="${loongarch_multilib_list_make},"
++		done
++		unset component elem_abi_base elem_abi_ext elem_tmp parse_state all_abis
++
++
++		# Check if the default ABI combination is in the default list.
++		if test x${loongarch_multilib_list_sane} = xno; then
++			if test x${with_abiext} = xbase; then
++				with_abiext=""
++			else
++				with_abiext="/${with_abiext}"
++			fi
++
++			echo "Default ABI combination (${with_abi}${with_abiext})" \
++			"not found in --with-multilib-list." 1>&2
++			exit 1
++		fi
++
++		# Remove the excessive appending comma.
++		loongarch_multilib_list_c=${loongarch_multilib_list_c%,}
++		loongarch_multilib_list_make=${loongarch_multilib_list_make%,}
+ 		;;
+ 
+ 	nds32*-*-*)
+@@ -4935,17 +5295,54 @@ case ${target} in
+ 		;;
+ 
+ 	loongarch*-*-*)
+-		case ${default_loongarch_arch} in
+-		    loongarch64) tm_defines="$tm_defines LARCH_ISA_DEFAULT=0" ;;
+-		    loongarch32) tm_defines="$tm_defines LARCH_ISA_DEFAULT=1" ;;
++		# Export canonical triplet.
++		tm_defines="${tm_defines} LA_MULTIARCH_TRIPLET=${la_canonical_triplet}"
++
++		# Define macro LA_DISABLE_MULTILIB if --disable-multilib
++		tm_defines="${tm_defines} TM_MULTILIB_LIST=${loongarch_multilib_list_c}"
++		if test x$enable_multilib = xyes; then
++			TM_MULTILIB_CONFIG="${loongarch_multilib_list_make}"
++		else
++			tm_defines="${tm_defines} LA_DISABLE_MULTILIB"
++		fi
++
++		# Let --with- flags initialize the enum variables from loongarch.opt.
++		# See macro definitions from loongarch-opts.h and loongarch-cpu.h.
++
++		# Architecture
++		tm_defines="${tm_defines} DEFAULT_CPU_ARCH=CPU_$(tr a-z- A-Z_ <<< ${with_arch})"
++
++		# Base ABI type
++		tm_defines="${tm_defines} DEFAULT_ABI_BASE=ABI_BASE_$(tr a-z- A-Z_ <<< ${with_abi})"
++
++		# ABI Extension
++		case ${with_abiext} in
++		base)      tm_defines="${tm_defines} DEFAULT_ABI_EXT=ABI_EXT_BASE" ;;
+ 		esac
+-		case ${default_loongarch_abi} in
+-		    lp64)   tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP64" ;;
+-		    lp32)   tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP32" ;;
++
++		# Microarchitecture
++		if test x${with_tune} != x; then
++		  tm_defines="${tm_defines} DEFAULT_CPU_TUNE=CPU_$(tr a-z- A-Z_ <<< ${with_tune})"
++		fi
++
++		# FPU adjustment
++		case ${with_fpu} in
++		none)    tm_defines="$tm_defines DEFAULT_ISA_EXT_FPU=ISA_EXT_NONE" ;;
++		32)      tm_defines="$tm_defines DEFAULT_ISA_EXT_FPU=ISA_EXT_FPU32" ;;
++		64)      tm_defines="$tm_defines DEFAULT_ISA_EXT_FPU=ISA_EXT_FPU64" ;;
+ 		esac
++
++		# SIMD extensions
++		case ${with_simd} in
++		none)    tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_NONE" ;;
++		lsx)     tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LSX" ;;
++		lasx)    tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LASX" ;;
++		esac
++
+ 		tmake_file="loongarch/t-loongarch $tmake_file"
+ 		;;
+ 
++
+ 	powerpc*-*-* | rs6000-*-*)
+ 		# FIXME: The PowerPC port uses the value set at compile time,
+ 		# although it's only cosmetic.
+diff --git a/gcc/config.host b/gcc/config.host
+index d23dae4ac..c65569da2 100644
+--- a/gcc/config.host
++++ b/gcc/config.host
+@@ -139,18 +139,6 @@ case ${host} in
+ 	host_extra_gcc_objs="driver-native.o"
+ 	host_xmake_file="${host_xmake_file} mips/x-native"
+       ;;
+-      loongarch*-*-linux*)
+-	host_extra_gcc_objs="driver-native.o"
+-	host_xmake_file="${host_xmake_file} loongarch/x-native"
+-      ;;
+-    esac
+-    ;;
+-  loongarch*-*-linux*)
+-    case ${target} in
+-      loongarch*-*-linux*)
+-	host_extra_gcc_objs="driver-native.o"
+-	host_xmake_file="${host_xmake_file} loongarch/x-native"
+-      ;;
+     esac
+     ;;
+   rs6000-*-* \
+diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
+index ae8596107..82c0ccf37 100644
+--- a/gcc/config/loongarch/constraints.md
++++ b/gcc/config/loongarch/constraints.md
+@@ -1,5 +1,6 @@
+-;; Constraint definitions for LARCH.
+-;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
++;; Constraint definitions for LoongArch.
++;; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;; Contributed by Loongson Co. Ltd.
+ ;;
+ ;; This file is part of GCC.
+ ;;
+@@ -20,160 +21,158 @@
+ ;; Register constraints
+ 
+ ;; "a" A constant call global and noplt address.
+-;; "b" ALL_REGS
++;; "b" <-----unused
+ ;; "c" A constant call local address.
+-;; "d" -
+-;; "e" JALR_REGS
++;; "d" <-----unused
++;; "e" JIRL_REGS
+ ;; "f" FP_REGS
+-;; "g" *
++;; "g" <-----unused
+ ;; "h" A constant call plt address.
+-;; "i" "Matches a general integer constant."
++;; "i" Matches a general integer constant. (Global non-architectural)
+ ;; "j" SIBCALL_REGS
+-;; "k" *
+-;; "l" "A signed 16-bit constant ."
+-;; "m" "A memory operand whose address is formed by a base register and offset
+-;;      that is suitable for use in instructions with the same addressing mode
+-;;      as @code{st.w} and @code{ld.w}."
+-;; "n" "Matches a non-symbolic integer constant."
+-;; "o" "Matches an offsettable memory reference."
+-;; "p" "Matches a general address."
+-;; "q" CSR_REGS
+-;; "r" GENERAL_REGS
+-;; "s" "Matches a symbolic integer constant."
++;; "k" A memory operand whose address is formed by a base register and
++;;      (optionally scaled) index register.
++;; "l" A signed 16-bit constant.
++;; "m" A memory operand whose address is formed by a base register and offset
++;;     that is suitable for use in instructions with the same addressing mode
++;;     as @code{st.w} and @code{ld.w}.
++;; "n" Matches a non-symbolic integer constant. (Global non-architectural)
++;; "o" Matches an offsettable memory reference. (Global non-architectural)
++;; "p" Matches a general address. (Global non-architectural)
++;; "q" A general-purpose register except for $r0 and $r1 for lcsr.
++;; "r" GENERAL_REGS (Global non-architectural)
++;; "s" Matches a symbolic integer constant. (Global non-architectural)
+ ;; "t" A constant call weak address
+-;; "u" -
+-;; "v" -
+-;; "w" "Matches any valid memory."
+-;; "x" -
+-;; "y" GR_REGS
+-;; "z" ST_REGS
+-;; "A" - 
+-;; "B" -
+-;; "C" -
+-;; "D" - 
+-;; "E" "Matches a floating-point constant."
+-;; "F" "Matches a floating-point constant."
+-;; "G" "Floating-point zero."
+-;; "H" -
+-;; "I" "A signed 12-bit constant (for arithmetic instructions)."
+-;; "J" "Integer zero."
+-;; "K" "An unsigned 12-bit constant (for logic instructions)."
+-;; "L" "A signed 32-bit constant in which the lower 12 bits are zero.
+-;; "M" "A constant that cannot be loaded using @code{lui}, @code{addiu} or @code{ori}."
+-;; "N" "A constant in the range -65535 to -1 (inclusive)."
+-;; "O" "A signed 15-bit constant."
+-;; "P" "A constant in the range 1 to 65535 (inclusive)."
+-;; "Q" "A signed 12-bit constant"
+-;; "R" "An address that can be used in a non-macro load or store."
+-;; "S" "A constant call address."
+-;; "T" -
+-;; "U" -
+-;; "V" "Matches a non-offsettable memory reference."
+-;; "W" "A memory address based on a member of @code{BASE_REG_CLASS}.  This is
+-;;     true for all references (although it can sometimes be implicit
+-;;     if @samp{!TARGET_EXPLICIT_RELOCS})."
+-;; "X" "Matches anything."
++;; "u" A signed 52bit constant and low 32-bit is zero (for logic instructions)
++;; "v" A signed 64-bit constant and low 44-bit is zero (for logic instructions)
++;; "w" Matches any valid memory.
++;; "x" <-----unused
++;; "y" <-----unused
++;; "z" FCC_REGS
++;; "A" <-----unused
++;; "B" <-----unused
++;; "C" <-----unused
++;; "D" <-----unused
++;; "E" Matches a floating-point constant. (Global non-architectural)
++;; "F" Matches a floating-point constant. (Global non-architectural)
++;; "G" Floating-point zero.
++;; "H" <-----unused
++;; "I" A signed 12-bit constant (for arithmetic instructions).
++;; "J" Integer zero.
++;; "K" An unsigned 12-bit constant (for logic instructions).
++;; "L" <-----unused
++;; "M" <-----unused
++;; "N" <-----unused
++;; "O" <-----unused
++;; "P" <-----unused
++;; "Q" <-----unused
++;; "R" <-----unused
++;; "S" <-----unused
++;; "T" <-----unused
++;; "U" <-----unused
++;; "V" Matches a non-offsettable memory reference. (Global non-architectural)
++;; "W" <-----unused
++;; "X" Matches anything. (Global non-architectural)
+ ;; "Y" -
+-;;    "YG"
+-;;       "A vector zero."
+-;;    "YA"
+-;;       "An unsigned 6-bit constant."
+-;;    "YB"
+-;;       "A signed 10-bit constant."
+-;;    "Yb"
+ ;;    "Yd"
+-;;       "A constant @code{move_operand} that can be safely loaded into @code{$25}
+-;;       using @code{la}."
+-;;    "Yh"
+-;;    "Yw"
++;;       A constant @code{move_operand} that can be safely loaded using
++;;	  @code{la}.
++;;    "YG"
++;;	 A vector zero.
+ ;;    "Yx"
+-;;    "YI"
+-;;       "A replicated vector const in which the replicated value is in the range
+-;;       [-512,511]."
+ ;;    "YC"
+-;;       "A replicated vector const in which the replicated value has a single
+-;;       bit set."
++;;       A replicated vector const in which the replicated value has a single
++;;       bit set.
+ ;;    "YZ"
+-;;       "A replicated vector const in which the replicated value has a single
+-;;       bit clear."
++;;       A replicated vector const in which the replicated value has a single
++;;       bit clear.
+ ;; "Z" -
+ ;;    "ZC"
+-;;      "A memory operand whose address is formed by a base register and offset
++;;       A memory operand whose address is formed by a base register and offset
+ ;;       that is suitable for use in instructions with the same addressing mode
+-;;       as @code{ll.w} and @code{sc.w}."
+-;;    "ZD"
+-;;      "An address suitable for a @code{prefetch} instruction, or for any other
+-;;       instruction with the same addressing mode as @code{prefetch}."
+-;;    "ZR"
+-;;      "An address valid for loading/storing register exclusive"
++;;       as @code{ll.w} and @code{sc.w}.
+ ;;    "ZB"
+-;;      "An address that is held in a general-purpose register.
+-;;      The offset is zero"
++;;       An address that is held in a general-purpose register.
++;;       The offset is zero.
++;; "<" Matches a pre-dec or post-dec operand. (Global non-architectural)
++;; ">" Matches a pre-inc or post-inc operand. (Global non-architectural)
+ 
++(define_constraint "a"
++  "@internal
++   A constant call global and noplt address."
++  (match_operand 0 "is_const_call_global_noplt_symbol"))
+ 
+ (define_constraint "c"
+   "@internal
+    A constant call local address."
+   (match_operand 0 "is_const_call_local_symbol"))
+ 
+-(define_constraint "a"
+-  "@internal
+-   A constant call global and noplt address."
+-  (match_operand 0 "is_const_call_global_noplt_symbol"))
++(define_register_constraint "e" "JIRL_REGS"
++  "@internal")
++
++(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
++  "A floating-point register (if available).")
+ 
+ (define_constraint "h"
+   "@internal
+    A constant call plt address."
+   (match_operand 0 "is_const_call_plt_symbol"))
+ 
+-(define_constraint "t"
+-  "@internal
+-   A constant call weak address."
+-  (match_operand 0 "is_const_call_weak_symbol"))
+-
+-(define_register_constraint "e" "JALR_REGS"
++(define_register_constraint "j" "SIBCALL_REGS"
+   "@internal")
+ 
+-(define_register_constraint "q" "CSR_REGS"
+-  "A general-purpose register except for $r0 and $r1 for csr.")
++(define_memory_constraint "k"
++  "A memory operand whose address is formed by a base register and (optionally scaled)
++   index register."
++  (and (match_code "mem")
++       (match_test "loongarch_base_index_address_p (XEXP (op, 0), mode)")))
+ 
+-(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+-  "A floating-point register (if available).")
++(define_constraint "l"
++"A signed 16-bit constant."
++(and (match_code "const_int")
++     (match_test "IMM16_OPERAND (ival)")))
+ 
+-(define_register_constraint "b" "ALL_REGS"
+-  "@internal")
++(define_memory_constraint "m"
++  "A memory operand whose address is formed by a base register and offset
++   that is suitable for use in instructions with the same addressing mode
++   as @code{st.w} and @code{ld.w}."
++  (and (match_code "mem")
++       (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), mode)")))
+ 
+-(define_register_constraint "j" "SIBCALL_REGS"
+-  "@internal")
++(define_register_constraint "q" "CSR_REGS"
++  "A general-purpose register except for $r0 and $r1 for lcsr.")
+ 
+-(define_constraint "l"
+-  "A signed 16-bit constant ."
++(define_constraint "t"
++  "@internal
++   A constant call weak address."
++  (match_operand 0 "is_const_call_weak_symbol"))
++
++(define_constraint "u"
++  "A signed 52bit constant and low 32-bit is zero (for logic instructions)."
+   (and (match_code "const_int")
+-       (match_test "IMM16_OPERAND (ival)")))
++       (match_test "LU32I_OPERAND (ival)")))
+ 
+-(define_register_constraint "y" "GR_REGS"
+-  "Equivalent to @code{r}; retained for backwards compatibility.")
++(define_constraint "v"
++  "A signed 64-bit constant and low 52-bit is zero (for logic instructions)."
++  (and (match_code "const_int")
++       (match_test "LU52I_OPERAND (ival)")))
+ 
+-(define_register_constraint "z" "ST_REGS"
++(define_register_constraint "z" "FCC_REGS"
+   "A floating-point condition code register.")
+ 
+-(define_constraint "kf"
+-  "@internal"
+-  (match_operand 0 "force_to_mem_operand"))
++;; Floating-point constraints
+ 
+-;; This is a normal rather than a register constraint because we can
+-;; never use the stack pointer as a reload register.
+-(define_constraint "ks"
+-  "@internal"
+-  (and (match_code "reg")
+-       (match_test "REGNO (op) == STACK_POINTER_REGNUM")))
++(define_constraint "G"
++  "Floating-point zero."
++  (and (match_code "const_double")
++       (match_test "op == CONST0_RTX (mode)")))
+ 
+ ;; Integer constraints
+ 
+ (define_constraint "I"
+   "A signed 12-bit constant (for arithmetic instructions)."
+   (and (match_code "const_int")
+-       (match_test "SMALL_OPERAND (ival)")))
++       (match_test "IMM12_OPERAND (ival)")))
+ 
+ (define_constraint "J"
+   "Integer zero."
+@@ -183,53 +182,7 @@
+ (define_constraint "K"
+   "An unsigned 12-bit constant (for logic instructions)."
+   (and (match_code "const_int")
+-       (match_test "SMALL_OPERAND_UNSIGNED (ival)")))
+- 
+-(define_constraint "u"
+-  "An unsigned 12-bit constant (for logic instructions)."
+-  (and (match_code "const_int")
+-       (match_test "LU32I_OPERAND (ival)")))
+-
+-(define_constraint "v"
+-  "An unsigned 12-bit constant (for logic instructions)."
+-  (and (match_code "const_int")
+-       (match_test "LU52I_OPERAND (ival)")))
+-
+-(define_constraint "L"
+-  "A signed 32-bit constant in which the lower 12 bits are zero.
+-   Such constants can be loaded using @code{lui}."
+-  (and (match_code "const_int")
+-       (match_test "LUI_OPERAND (ival)")))
+-
+-(define_constraint "M"
+-  "A constant that cannot be loaded using @code{lui}, @code{addiu}
+-   or @code{ori}."
+-  (and (match_code "const_int")
+-       (not (match_test "SMALL_OPERAND (ival)"))
+-       (not (match_test "SMALL_OPERAND_UNSIGNED (ival)"))
+-       (not (match_test "LUI_OPERAND (ival)"))))
+-
+-(define_constraint "N"
+-  "A constant in the range -65535 to -1 (inclusive)."
+-  (and (match_code "const_int")
+-       (match_test "ival >= -0xffff && ival < 0")))
+-
+-(define_constraint "O"
+-  "A signed 15-bit constant."
+-  (and (match_code "const_int")
+-       (match_test "ival >= -0x4000 && ival < 0x4000")))
+-
+-(define_constraint "P"
+-  "A constant in the range 1 to 65535 (inclusive)."
+-  (and (match_code "const_int")
+-       (match_test "ival > 0 && ival < 0x10000")))
+-
+-;; Floating-point constraints
+-
+-(define_constraint "G"
+-  "Floating-point zero."
+-  (and (match_code "const_double")
+-       (match_test "op == CONST0_RTX (mode)")))
++       (match_test "IMM12_OPERAND_UNSIGNED (ival)")))
+ 
+ ;; General constraints
+ 
+@@ -237,33 +190,35 @@
+   "@internal"
+   (match_operand 0 "const_arith_operand"))
+ 
+-(define_memory_constraint "R"
+-  "An address that can be used in a non-macro load or store."
+-  (and (match_code "mem")
+-       (match_test "loongarch_address_insns (XEXP (op, 0), mode, false) == 1")))
++(define_constraint "Yd"
++  "@internal
++   A constant @code{move_operand} that can be safely loaded using
++   @code{la}."
++  (and (match_operand 0 "move_operand")
++       (match_test "CONSTANT_P (op)")))
+ 
+-(define_memory_constraint "m"
++(define_constraint "Yx"
++   "@internal"
++   (match_operand 0 "low_bitmask_operand"))
++
++(define_memory_constraint "ZC"
+   "A memory operand whose address is formed by a base register and offset
+    that is suitable for use in instructions with the same addressing mode
+-   as @code{st.w} and @code{ld.w}."
++   as @code{ll.w} and @code{sc.w}."
+   (and (match_code "mem")
+-       (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), mode)")))
++       (match_test "loongarch_14bit_shifted_offset_address_p (XEXP (op, 0), mode)")))
+ 
+-(define_constraint "S"
++(define_memory_constraint "ZB"
+   "@internal
+-   A constant call address."
+-  (and (match_operand 0 "call_insn_operand")
+-       (match_test "CONSTANT_P (op)")))
++  An address that is held in a general-purpose register.
++  The offset is zero"
++  (and (match_code "mem")
++       (match_test "REG_P (XEXP (op, 0))")))
+ 
+-(define_memory_constraint "W"
+-  "@internal
+-   A memory address based on a member of @code{BASE_REG_CLASS}.  This is
+-   true for allreferences (although it can sometimes be implicit
+-   if @samp{!TARGET_EXPLICIT_RELOCS})."
++(define_memory_constraint "R"
++  "An address that can be used in a non-macro load or store."
+   (and (match_code "mem")
+-       (match_operand 0 "memory_operand")
+-	    (and (not (match_operand 0 "stack_operand"))
+-		 (not (match_test "CONSTANT_P (XEXP (op, 0))")))))
++       (match_test "loongarch_address_insns (XEXP (op, 0), mode, false) == 1")))
+ 
+ (define_constraint "YG"
+   "@internal
+@@ -271,41 +226,6 @@
+   (and (match_code "const_vector")
+        (match_test "op == CONST0_RTX (mode)")))
+ 
+-(define_constraint "YA"
+-  "@internal
+-   An unsigned 6-bit constant."
+-  (and (match_code "const_int")
+-       (match_test "UIMM6_OPERAND (ival)")))
+-
+-(define_constraint "YB"
+-  "@internal
+-   A signed 10-bit constant."
+-  (and (match_code "const_int")
+-       (match_test "IMM10_OPERAND (ival)")))
+-
+-(define_constraint "Yb"
+-   "@internal"
+-   (match_operand 0 "qi_mask_operand"))
+-
+-(define_constraint "Yd"
+-  "@internal
+-   A constant @code{move_operand} that can be safely loaded into @code{$25}
+-   using @code{la}."
+-  (and (match_operand 0 "move_operand")
+-       (match_test "CONSTANT_P (op)")))
+-
+-(define_constraint "Yh"
+-   "@internal"
+-    (match_operand 0 "hi_mask_operand"))
+-
+-(define_constraint "Yw"
+-   "@internal"
+-    (match_operand 0 "si_mask_operand"))
+-
+-(define_constraint "Yx"
+-   "@internal"
+-   (match_operand 0 "low_bitmask_operand"))
+-
+ (define_constraint "YI"
+   "@internal
+    A replicated vector const in which the replicated value is in the range
+@@ -360,30 +280,3 @@
+    A replicated vector const with replicated byte values as well as elements"
+   (and (match_code "const_vector")
+        (match_test "loongarch_const_vector_same_bytes_p (op, mode)")))
+-
+-(define_memory_constraint "ZC"
+-  "A memory operand whose address is formed by a base register and offset
+-   that is suitable for use in instructions with the same addressing mode
+-   as @code{ll.w} and @code{sc.w}."
+-  (and (match_code "mem")
+-       (match_test "loongarch_14bit_shifted_offset_address_p (XEXP (op, 0), mode)")))
+-
+-;;(define_address_constraint "ZD"
+-;;  "An address suitable for a @code{prefetch} instruction, or for any other
+-;;   instruction with the same addressing mode as @code{prefetch}."
+-;;	(if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT")
+-;;		(match_test "loongarch_9bit_offset_address_p (op, mode)")
+-;;		(match_test "loongarch_address_insns (op, mode, false)")))
+-
+-(define_memory_constraint "ZR"
+- "@internal
+-  An address valid for loading/storing register exclusive"
+- (match_operand 0 "mem_noofs_operand"))
+-
+-(define_memory_constraint "ZB"
+-  "@internal
+-  An address that is held in a general-purpose register.
+-  The offset is zero"
+-  (and (match_code "mem")
+-       (match_test "GET_CODE(XEXP(op,0)) == REG")))
+-
+diff --git a/gcc/config/loongarch/driver-native.c b/gcc/config/loongarch/driver-native.c
+deleted file mode 100644
+index 5484ee502..000000000
+--- a/gcc/config/loongarch/driver-native.c
++++ /dev/null
+@@ -1,82 +0,0 @@
+-/* Subroutines for the gcc driver.
+-   Copyright (C) 2008-2018 Free Software Foundation, Inc.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify
+-it under the terms of the GNU General Public License as published by
+-the Free Software Foundation; either version 3, or (at your option)
+-any later version.
+-
+-GCC is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-GNU General Public License for more details.
+-
+-You should have received a copy of the GNU General Public License
+-along with GCC; see the file COPYING3.  If not see
+-<http://www.gnu.org/licenses/>.  */
+-
+-#define IN_TARGET_CODE 1
+-
+-#include "config.h"
+-#include "system.h"
+-#include "coretypes.h"
+-#include "tm.h"
+-
+-
+-/* This function must set to noinline. Otherwise the arg can not be passed.  */
+-int loongson_cpucfg (int arg)
+-{
+-  int ret;
+-  __asm__ __volatile__ ("cpucfg %0,%1\n\t" /* cpucfg $2,$4.  */
+-                        :"=r"(ret)
+-			:"r"(arg)
+-			:);
+-  return ret;
+-}
+-
+-/* This will be called by the spec parser in gcc.c when it sees
+-   a %:local_cpu_detect(args) construct.  Currently it will be called
+-   with either "arch" or "tune" as argument depending on if -march=native
+-   or -mtune=native is to be substituted.
+-
+-   It returns a string containing new command line parameters to be
+-   put at the place of the above two options, depending on what CPU
+-   this is executed.  E.g. "-march=loongson2f" on a Loongson 2F for
+-   -march=native.  If the routine can't detect a known processor,
+-   the -march or -mtune option is discarded.
+-
+-   ARGC and ARGV are set depending on the actual arguments given
+-   in the spec.  */
+-const char *
+-host_detect_local_cpu (int argc, const char **argv)
+-{
+-  const char *cpu = NULL;
+-  bool arch;
+-  int cpucfg_arg;
+-  int cpucfg_ret;
+-
+-  if (argc < 1)
+-    return NULL;
+-
+-  arch = strcmp (argv[0], "arch") == 0;
+-  if (!arch && strcmp (argv[0], "tune"))
+-    return NULL;
+-
+-  cpucfg_arg = 0;
+-  cpucfg_ret = loongson_cpucfg (cpucfg_arg);
+-  if (((cpucfg_ret >> 16) & 0xff) == 0x14)
+-    {
+-      if (((cpucfg_ret >> 8) & 0xff) == 0xc0)
+-	cpu = "la464";
+-      else
+-	cpu = NULL;
+-    }
+-
+-
+-  if (cpu == NULL)
+-    return NULL;
+-
+-  return concat ("-m", argv[0], "=", cpu, NULL);
+-}
+diff --git a/gcc/config/loongarch/elf.h b/gcc/config/loongarch/elf.h
+index b7f938e31..edb0e77d2 100644
+--- a/gcc/config/loongarch/elf.h
++++ b/gcc/config/loongarch/elf.h
+@@ -1,5 +1,6 @@
+-/* Target macros for loongarch*-elf targets.
+-   Copyright (C) 1994-2018 Free Software Foundation, Inc.
++/* Definitions for LoongArch systems using GNU (glibc-based) userspace,
++   or other userspace with libc derived from glibc.
++   Copyright (C) 1998-2018 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+@@ -17,34 +18,37 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+-/* LARCH assemblers don't have the usual .set foo,bar construct;
+-   .set is used for assembler options instead.  */
+-#undef SET_ASM_OP
+-#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2)			\
+-  do								\
+-    {								\
+-      fputc ('\t', FILE);					\
+-      assemble_name (FILE, LABEL1);				\
+-      fputs (" = ", FILE);					\
+-      assemble_name (FILE, LABEL2);				\
+-      fputc ('\n', FILE);					\
+-    }								\
+-  while (0)
+-
+-#undef ASM_DECLARE_OBJECT_NAME
+-#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name
+-
+-#undef ASM_FINISH_DECLARE_OBJECT
+-#define ASM_FINISH_DECLARE_OBJECT loongarch_finish_declare_object
+-
+-/* Leave the linker script to choose the appropriate libraries.  */
++/* Define the size of the wide character type.  */
++#undef WCHAR_TYPE
++#define WCHAR_TYPE "int"
++
++#undef WCHAR_TYPE_SIZE
++#define WCHAR_TYPE_SIZE 32
++
++
++/* GNU-specific SPEC definitions.  */
++#define GNU_USER_LINK_EMULATION "elf" ABI_GRLEN_SPEC "loongarch"
++
++#undef GNU_USER_TARGET_LINK_SPEC
++#define GNU_USER_TARGET_LINK_SPEC \
++  "%{shared} -m " GNU_USER_LINK_EMULATION
++
++
++/* Link against Newlib libraries, because the ELF backend assumes Newlib.
++   Handle the circular dependence between libc and libgloss. */
+ #undef  LIB_SPEC
+-#define LIB_SPEC ""
++#define LIB_SPEC "--start-group -lc %{!specs=nosys.specs:-lgloss} --end-group"
++
++#undef LINK_SPEC
++#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+ 
+ #undef  STARTFILE_SPEC
+-#define STARTFILE_SPEC "crti%O%s crtbegin%O%s"
++#define STARTFILE_SPEC "crt0%O%s crtbegin%O%s"
+ 
+ #undef  ENDFILE_SPEC
+-#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
++#define ENDFILE_SPEC "crtend%O%s"
+ 
+ #define NO_IMPLICIT_EXTERN_C 1
++#undef SUBTARGET_CC1_SPEC
++#define SUBTARGET_CC1_SPEC "%{profile:-p}"
++
+diff --git a/gcc/config/loongarch/frame-header-opt.c b/gcc/config/loongarch/frame-header-opt.c
+deleted file mode 100644
+index 86e5d423d..000000000
+--- a/gcc/config/loongarch/frame-header-opt.c
++++ /dev/null
+@@ -1,292 +0,0 @@
+-/* Analyze functions to determine if callers need to allocate a frame header
+-   on the stack.  The frame header is used by callees to save their arguments.
+-   This optimization is specific to TARGET_OLDABI targets.  For TARGET_NEWABI
+-   targets, if a frame header is required, it is allocated by the callee.
+-
+-
+-   Copyright (C) 2015-2018 Free Software Foundation, Inc.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify it
+-under the terms of the GNU General Public License as published by the
+-Free Software Foundation; either version 3, or (at your option) any
+-later version.
+-
+-GCC is distributed in the hope that it will be useful, but WITHOUT
+-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-for more details.
+-
+-You should have received a copy of the GNU General Public License
+-along with GCC; see the file COPYING3.  If not see
+-<http://www.gnu.org/licenses/>.  */
+-
+-
+-#define IN_TARGET_CODE 1
+-
+-#include "config.h"
+-#include "system.h"
+-#include "context.h"
+-#include "coretypes.h"
+-#include "tree.h"
+-#include "tree-core.h"
+-#include "tree-pass.h"
+-#include "target.h"
+-#include "target-globals.h"
+-#include "profile-count.h"
+-#include "cfg.h"
+-#include "cgraph.h"
+-#include "function.h"
+-#include "basic-block.h"
+-#include "gimple.h"
+-#include "gimple-iterator.h"
+-#include "gimple-walk.h"
+-
+-static unsigned int frame_header_opt (void);
+-
+-namespace {
+-
+-const pass_data pass_data_ipa_frame_header_opt =
+-{
+-  IPA_PASS, /* type */
+-  "frame-header-opt", /* name */
+-  OPTGROUP_NONE, /* optinfo_flags */
+-  TV_CGRAPHOPT, /* tv_id */
+-  0, /* properties_required */
+-  0, /* properties_provided */
+-  0, /* properties_destroyed */
+-  0, /* todo_flags_start */
+-  0, /* todo_flags_finish */
+-};
+-
+-class pass_ipa_frame_header_opt : public ipa_opt_pass_d
+-{
+-public:
+-  pass_ipa_frame_header_opt (gcc::context *ctxt)
+-    : ipa_opt_pass_d (pass_data_ipa_frame_header_opt, ctxt,
+-                      NULL, /* generate_summary */
+-                      NULL, /* write_summary */
+-                      NULL, /* read_summary */
+-                      NULL, /* write_optimization_summary */
+-                      NULL, /* read_optimization_summary */
+-                      NULL, /* stmt_fixup */
+-                      0, /* function_transform_todo_flags_start */
+-                      NULL, /* function_transform */
+-                      NULL) /* variable_transform */
+-  {}
+-
+-  /* opt_pass methods: */
+-  virtual bool gate (function *)
+-    {
+-      /* This optimization has no affect if TARGET_NEWABI.   If optimize
+-         is not at least 1 then the data needed for the optimization is
+-         not available and nothing will be done anyway.  */
+-      return TARGET_OLDABI && flag_frame_header_optimization && optimize > 0;
+-    }
+-
+-  virtual unsigned int execute (function *) { return frame_header_opt (); }
+-
+-}; // class pass_ipa_frame_header_opt
+-
+-} // anon namespace
+-
+-static ipa_opt_pass_d *
+-make_pass_ipa_frame_header_opt (gcc::context *ctxt)
+-{
+-  return new pass_ipa_frame_header_opt (ctxt);
+-}
+-
+-void
+-loongarch_register_frame_header_opt (void)
+-{
+-  opt_pass *p = make_pass_ipa_frame_header_opt (g);
+-  struct register_pass_info f = { p, "comdats", 1, PASS_POS_INSERT_AFTER };
+-  register_pass (&f);
+-}
+-
+-
+-/* Return true if it is certain that this is a leaf function.  False if it is
+-   not a leaf function or if it is impossible to tell.  */
+-
+-static bool
+-is_leaf_function (function *fn)
+-{
+-  basic_block bb;
+-  gimple_stmt_iterator gsi;
+-
+-  /* If we do not have a cfg for this function be conservative and assume
+-     it is not a leaf function.  */
+-  if (fn->cfg == NULL)
+-    return false;
+-
+-  FOR_EACH_BB_FN (bb, fn)
+-    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+-      if (is_gimple_call (gsi_stmt (gsi)))
+-	return false;
+-  return true;
+-}
+-
+-/* Return true if this function has inline assembly code or if we cannot
+-   be certain that it does not.  False if we know that there is no inline
+-   assembly.  */
+-
+-static bool
+-has_inlined_assembly (function *fn)
+-{
+-  basic_block bb;
+-  gimple_stmt_iterator gsi;
+-
+-  /* If we do not have a cfg for this function be conservative and assume
+-     it is may have inline assembly.  */
+-  if (fn->cfg == NULL)
+-    return true;
+-
+-  FOR_EACH_BB_FN (bb, fn)
+-    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+-      if (gimple_code (gsi_stmt (gsi)) == GIMPLE_ASM)
+-	return true;
+-
+-  return false;
+-}
+-
+-/* Return true if this function will use the stack space allocated by its
+-   caller or if we cannot determine for certain that it does not.  */
+-
+-static bool
+-needs_frame_header_p (function *fn)
+-{
+-  tree t;
+-
+-  if (fn->decl == NULL)
+-    return true;
+-
+-  if (fn->stdarg)
+-    return true;
+-
+-  for (t = DECL_ARGUMENTS (fn->decl); t; t = TREE_CHAIN (t))
+-    {
+-      if (!use_register_for_decl (t))
+-	return true;
+-
+-      /* Some 64-bit types may get copied to general registers using the frame
+-	 header, see loongarch_output_64bit_xfer.  Checking for SImode only may be
+-         overly restrictive but it is guaranteed to be safe. */
+-      if (DECL_MODE (t) != SImode)
+-	return true;
+-    }
+-
+-  return false;
+-}
+-
+-/* Return true if the argument stack space allocated by function FN is used.
+-   Return false if the space is needed or if the need for the space cannot
+-   be determined.  */
+-
+-static bool
+-callees_functions_use_frame_header (function *fn)
+-{
+-  basic_block bb;
+-  gimple_stmt_iterator gsi;
+-  gimple *stmt;
+-  tree called_fn_tree;
+-  function *called_fn;
+-
+-  if (fn->cfg == NULL)
+-    return true;
+-
+-  FOR_EACH_BB_FN (bb, fn)
+-    {
+-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+-	{
+-	  stmt = gsi_stmt (gsi);
+-	  if (is_gimple_call (stmt))
+-	    {
+-	      called_fn_tree = gimple_call_fndecl (stmt);
+-	      if (called_fn_tree != NULL)
+-	        {
+-	          called_fn = DECL_STRUCT_FUNCTION (called_fn_tree);
+-		  if (called_fn == NULL
+-		      || DECL_WEAK (called_fn_tree) 
+-		      || has_inlined_assembly (called_fn)
+-		      || !is_leaf_function (called_fn)
+-		      || !called_fn->machine->does_not_use_frame_header)
+-		    return true;
+-	        }
+-	      else
+-		return true;
+-            }
+-        }
+-    }
+-  return false;
+-}
+-
+-/* Set the callers_may_not_allocate_frame flag for any function which
+-   function FN calls because FN may not allocate a frame header.  */
+-
+-static void
+-set_callers_may_not_allocate_frame (function *fn)
+-{
+-  basic_block bb;
+-  gimple_stmt_iterator gsi;
+-  gimple *stmt;
+-  tree called_fn_tree;
+-  function *called_fn;
+-
+-  if (fn->cfg == NULL)
+-    return;
+-
+-  FOR_EACH_BB_FN (bb, fn)
+-    {
+-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+-	{
+-	  stmt = gsi_stmt (gsi);
+-	  if (is_gimple_call (stmt))
+-	    {
+-	      called_fn_tree = gimple_call_fndecl (stmt);
+-	      if (called_fn_tree != NULL)
+-	        {
+-	          called_fn = DECL_STRUCT_FUNCTION (called_fn_tree);
+-		  if (called_fn != NULL)
+-		    called_fn->machine->callers_may_not_allocate_frame = true;
+-	        }
+-            }
+-        }
+-    }
+-  return;
+-}
+-
+-/* Scan each function to determine those that need its frame headers.  Perform
+-   a second scan to determine if the allocation can be skipped because none of
+-   their callees require the frame header.  */
+-
+-static unsigned int
+-frame_header_opt ()
+-{
+-  struct cgraph_node *node;
+-  function *fn;
+-
+-  FOR_EACH_DEFINED_FUNCTION (node)
+-    {
+-      fn = node->get_fun ();
+-      if (fn != NULL)
+-	fn->machine->does_not_use_frame_header = !needs_frame_header_p (fn);
+-    }
+-
+-  FOR_EACH_DEFINED_FUNCTION (node)
+-    {
+-      fn = node->get_fun ();
+-      if (fn != NULL)
+-	fn->machine->optimize_call_stack
+-	  = !callees_functions_use_frame_header (fn) && !is_leaf_function (fn);
+-    }
+-
+-  FOR_EACH_DEFINED_FUNCTION (node)
+-    {
+-      fn = node->get_fun ();
+-      if (fn != NULL && fn->machine->optimize_call_stack)
+-	set_callers_may_not_allocate_frame (fn);
+-    }
+-
+-  return 0;
+-}
+diff --git a/gcc/config/loongarch/generic.md b/gcc/config/loongarch/generic.md
+index 321b8e561..0f6eb3f42 100644
+--- a/gcc/config/loongarch/generic.md
++++ b/gcc/config/loongarch/generic.md
+@@ -1,6 +1,8 @@
+-;; Generic DFA-based pipeline description for LARCH targets
+-;;   Copyright (C) 2004-2018 Free Software Foundation, Inc.
+-;;
++;; Generic DFA-based pipeline description for LoongArch targets
++;; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;; Contributed by Loongson Co. Ltd.
++;; Based on MIPS target for GNU compiler.
++
+ ;; This file is part of GCC.
+ 
+ ;; GCC is free software; you can redistribute it and/or modify it
+@@ -17,9 +19,16 @@
+ ;; along with GCC; see the file COPYING3.  If not see
+ ;; <http://www.gnu.org/licenses/>.
+ 
++(define_automaton "alu,imuldiv")
++
++(define_cpu_unit "alu" "alu")
++(define_cpu_unit "imuldiv" "imuldiv")
+ 
+-;; This file is derived from the old define_function_unit description.
+-;; Each reservation can be overridden on a processor-by-processor basis.
++;; Ghost instructions produce no real code.
++;; They exist purely to express an effect on dataflow.
++(define_insn_reservation "ghost" 0
++  (eq_attr "type" "ghost")
++  "nothing")
+ 
+ (define_insn_reservation "generic_alu" 1
+   (eq_attr "type" "unknown,prefetch,prefetchx,condmove,const,arith,
+@@ -43,7 +52,7 @@
+   "alu")
+ 
+ (define_insn_reservation "generic_imul" 17
+-  (eq_attr "type" "imul,imul3")
++  (eq_attr "type" "imul")
+   "imuldiv*17")
+ 
+ (define_insn_reservation "generic_fcvt" 1
+diff --git a/gcc/config/loongarch/genopt.sh b/gcc/config/loongarch/genopt.sh
+deleted file mode 100644
+index 272aac51d..000000000
+--- a/gcc/config/loongarch/genopt.sh
++++ /dev/null
+@@ -1,110 +0,0 @@
+-#!/bin/sh
+-# Generate loongarch-tables.opt from the list of CPUs in loongarch-cpus.def.
+-# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+-#
+-# This file is part of GCC.
+-#
+-# GCC is free software; you can redistribute it and/or modify
+-# it under the terms of the GNU General Public License as published by
+-# the Free Software Foundation; either version 3, or (at your option)
+-# any later version.
+-#
+-# GCC is distributed in the hope that it will be useful,
+-# but WITHOUT ANY WARRANTY; without even the implied warranty of
+-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-# GNU General Public License for more details.
+-#
+-# You should have received a copy of the GNU General Public License
+-# along with GCC; see the file COPYING3.  If not see
+-# <http://www.gnu.org/licenses/>.
+-
+-cat <<EOF
+-; -*- buffer-read-only: t -*-
+-; Generated automatically by genopt.sh from loongarch-cpus.def.
+-
+-; Copyright (C) 2011-2018 Free Software Foundation, Inc.
+-;
+-; This file is part of GCC.
+-;
+-; GCC is free software; you can redistribute it and/or modify it under
+-; the terms of the GNU General Public License as published by the Free
+-; Software Foundation; either version 3, or (at your option) any later
+-; version.
+-;
+-; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+-; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-; for more details.
+-;
+-; You should have received a copy of the GNU General Public License
+-; along with GCC; see the file COPYING3.  If not see
+-; <http://www.gnu.org/licenses/>.
+-
+-Enum
+-Name(loongarch_arch_opt_value) Type(int)
+-Known LARCH CPUs (for use with the -march= and -mtune= options):
+-
+-EnumValue
+-Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly
+-
+-EOF
+-
+-awk -F'[(, 	]+' '
+-BEGIN {
+-    value = 0
+-}
+-
+-# Write an entry for a single string accepted as a -march= argument.
+-
+-function write_one_arch_value(name, value, flags)
+-{
+-    print "EnumValue"
+-    print "Enum(loongarch_arch_opt_value) String(" name ") Value(" value ")" flags
+-    print ""
+-}
+-
+-# The logic for matching CPU name variants should be the same as in GAS.
+-
+-# Write an entry for a single string accepted as a -march= argument,
+-# plus any variant with a final "000" replaced by "k".
+-
+-function write_arch_value_maybe_k(name, value, flags)
+-{
+-    write_one_arch_value(name, value, flags)
+-    if (name ~ "000$") {
+-	sub("000$", "k", name)
+-	write_one_arch_value(name, value, "")
+-    }
+-}
+-
+-# Write all the entries for a -march= argument.  In addition to
+-# replacement of a final "000" with "k", an argument starting with
+-# "vr", "rm" or "r" followed by a number, or just a plain number,
+-# matches a plain number or "r" followed by a plain number.
+-
+-function write_all_arch_values(name, value)
+-{
+-    write_arch_value_maybe_k(name, value, " Canonical")
+-    cname = name
+-    if (cname ~ "^vr") {
+-	sub("^vr", "", cname)
+-    } else if (cname ~ "^rm") {
+-	sub("^rm", "", cname)
+-    } else if (cname ~ "^r") {
+-	sub("^r", "", cname)
+-    }
+-    if (cname ~ "^[0-9]") {
+-	if (cname != name)
+-	    write_arch_value_maybe_k(cname, value, "")
+-	rname = "r" cname
+-	if (rname != name)
+-	    write_arch_value_maybe_k(rname, value, "")
+-    }
+-}
+-
+-/^LARCH_CPU/ {
+-    name = $2
+-    gsub("\"", "", name)
+-    write_all_arch_values(name, value)
+-    value++
+-}' $1/loongarch-cpus.def
+diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
+new file mode 100755
+index 000000000..e895f7ec8
+--- /dev/null
++++ b/gcc/config/loongarch/genopts/genstr.sh
+@@ -0,0 +1,104 @@
++#!/bin/sh
++# A simple script that generates loongarch-str.h and loongarch.opt
++# from genopt/loongarch-optstr.
++#
++# Copyright (C) 2020-2022 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify it under
++# the terms of the GNU General Public License as published by the Free
++# Software Foundation; either version 3, or (at your option) any later
++# version.
++#
++# GCC is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++# License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++cd "$(dirname "$0")"
++
++# Generate a header containing definitions from the string table.
++gen_defines() {
++    cat <<EOF
++/* Generated automatically by "genstr" from "loongarch-strings".
++   Please do not edit this file directly.
++
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef LOONGARCH_STR_H
++#define LOONGARCH_STR_H
++EOF
++
++    sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \
++	-e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@#define \1 "\2"@' \
++	loongarch-strings
++
++    echo
++    echo "#endif /* LOONGARCH_STR_H */"
++}
++
++
++# Substitute all "@@<KEY>@@" to "<VALUE>" in loongarch.opt.in
++# according to the key-value pairs defined in loongarch-strings.
++
++gen_options() {
++
++    sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \
++	-e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@\1="\2"@' \
++	loongarch-strings | { \
++
++	# read the definitions
++	while read -r line; do
++	    eval "$line"
++	done
++
++	# print a header
++	cat << EOF
++; Generated by "genstr" from the template "loongarch.opt.in"
++; and definitions from "loongarch-strings".
++;
++; Please do not edit this file directly.
++; It will be automatically updated during a gcc build
++; if you change "loongarch.opt.in" or "loongarch-strings".
++;
++EOF
++
++	# make the substitutions
++	sed -e 's@"@\\"@g' -e 's/@@\([^@]\+\)@@/${\1}/g' loongarch.opt.in | \
++	    while read -r line; do
++		eval "echo \"$line\""
++	    done
++    }
++}
++
++main() {
++    case "$1" in
++	header) gen_defines;;
++	opt) gen_options;;
++	*) echo "Unknown Command: \"$1\". Available: header, opt"; exit 1;;
++    esac
++}
++
++main "$@"
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+new file mode 100644
+index 000000000..d79e2e791
+--- /dev/null
++++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -0,0 +1,68 @@
++# Defines the key strings for LoongArch compiler options.
++#
++# Copyright (C) 2020-2022 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify it under
++# the terms of the GNU General Public License as published by the Free
++# Software Foundation; either version 3, or (at your option) any later
++# version.
++#
++# GCC is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++# License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++# -march= / -mtune=
++OPTSTR_ARCH	      arch
++OPTSTR_TUNE	      tune
++
++STR_CPU_NATIVE	      native
++STR_CPU_ABI_DEFAULT   abi-default
++STR_CPU_LOONGARCH64   loongarch64
++STR_CPU_LA464	      la464
++STR_CPU_LA364	      la364
++STR_CPU_LA264	      la264
++STR_CPU_LA664	      la664
++
++# Base architecture
++STR_ISA_BASE_LA64V100 la64
++
++# -mfpu
++OPTSTR_ISA_EXT_FPU    fpu
++STR_NONE              none
++STR_ISA_EXT_FPU0      0
++STR_ISA_EXT_FPU32     32
++STR_ISA_EXT_FPU64     64
++
++OPTSTR_SOFT_FLOAT     soft-float
++OPTSTR_SINGLE_FLOAT   single-float
++OPTSTR_DOUBLE_FLOAT   double-float
++
++# SIMD extensions
++OPTSTR_ISA_EXT_SIMD   simd
++STR_ISA_EXT_LSX       lsx
++STR_ISA_EXT_LASX      lasx
++
++# -mabi=
++OPTSTR_ABI_BASE	      abi
++STR_ABI_BASE_LP64D    lp64d
++STR_ABI_BASE_LP64F    lp64f
++STR_ABI_BASE_LP64S    lp64s
++STR_ABI_BASE_LP64     lp64
++
++# ABI extension types
++STR_ABI_EXT_BASE      base
++
++# -mcmodel=
++OPTSTR_CMODEL	      cmodel
++STR_CMODEL_NORMAL     normal
++STR_CMODEL_TINY	      tiny
++STR_CMODEL_TS	      tiny-static
++STR_CMODEL_LARGE      large
++STR_CMODEL_EXTREME    extreme
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+new file mode 100644
+index 000000000..463dfec77
+--- /dev/null
++++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -0,0 +1,242 @@
++; Generated by "genstr" from the template "loongarch.opt.in"
++; and definitions from "loongarch-strings".
++;
++; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;
++; This file is part of GCC.
++;
++; GCC is free software; you can redistribute it and/or modify it under
++; the terms of the GNU General Public License as published by the Free
++; Software Foundation; either version 3, or (at your option) any later
++; version.
++;
++; GCC is distributed in the hope that it will be useful, but WITHOUT
++; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++; License for more details.
++;
++; You should have received a copy of the GNU General Public License
++; along with GCC; see the file COPYING3.  If not see
++; <http://www.gnu.org/licenses/>.
++;
++
++HeaderInclude
++config/loongarch/loongarch-opts.h
++
++HeaderInclude
++config/loongarch/loongarch-str.h
++
++TargetVariable
++unsigned int recip_mask = 0
++
++; ISA related options
++;; Base ISA
++Enum
++Name(isa_base) Type(int)
++Basic ISAs of LoongArch:
++
++EnumValue
++Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
++
++;; ISA extensions / adjustments
++Enum
++Name(isa_ext_fpu) Type(int)
++FPU types of LoongArch:
++
++EnumValue
++Enum(isa_ext_fpu) String(@@STR_NONE@@) Value(ISA_EXT_NONE)
++
++EnumValue
++Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU32@@) Value(ISA_EXT_FPU32)
++
++EnumValue
++Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64)
++
++m@@OPTSTR_ISA_EXT_FPU@@=
++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET)
++-m@@OPTSTR_ISA_EXT_FPU@@=FPU	Generate code for the given FPU.
++
++m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@
++Target RejectNegative Alias(m@@OPTSTR_ISA_EXT_FPU@@=,@@STR_NONE@@)
++
++m@@OPTSTR_SOFT_FLOAT@@
++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(m@@OPTSTR_SINGLE_FLOAT@@)
++Prevent the use of all hardware floating-point instructions.
++
++m@@OPTSTR_SINGLE_FLOAT@@
++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(m@@OPTSTR_DOUBLE_FLOAT@@)
++Restrict the use of hardware floating-point instructions to 32-bit operations.
++
++m@@OPTSTR_DOUBLE_FLOAT@@
++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(m@@OPTSTR_SOFT_FLOAT@@)
++Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations.
++
++Enum
++Name(isa_ext_simd) Type(int)
++SIMD extension levels of LoongArch:
++
++EnumValue
++Enum(isa_ext_simd) String(@@STR_NONE@@) Value(ISA_EXT_NONE)
++
++EnumValue
++Enum(isa_ext_simd) String(@@STR_ISA_EXT_LSX@@) Value(ISA_EXT_SIMD_LSX)
++
++EnumValue
++Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX)
++
++m@@OPTSTR_ISA_EXT_SIMD@@=
++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET)
++-m@@OPTSTR_ISA_EXT_SIMD@@=SIMD	Generate code for the given SIMD extension.
++
++m@@STR_ISA_EXT_LSX@@
++Target Driver Defer Var(la_deferred_options)
++Enable LoongArch SIMD Extension (LSX, 128-bit).
++
++m@@STR_ISA_EXT_LASX@@
++Target Driver Defer Var(la_deferred_options)
++Enable LoongArch Advanced SIMD Extension (LASX, 256-bit).
++
++;; Base target models (implies ISA & tune parameters)
++Enum
++Name(cpu_type) Type(int)
++LoongArch CPU types:
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_NATIVE@@) Value(CPU_NATIVE)
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_ABI_DEFAULT@@) Value(CPU_ABI_DEFAULT)
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_LA264@@) Value(CPU_LA264)
++
++EnumValue
++Enum(cpu_type) String(@@STR_CPU_LA364@@) Value(CPU_LA364)
++
++m@@OPTSTR_ARCH@@=
++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
++-m@@OPTSTR_ARCH@@=PROCESSOR	Generate code for the given PROCESSOR ISA.
++
++m@@OPTSTR_TUNE@@=
++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET)
++-m@@OPTSTR_TUNE@@=PROCESSOR	Generate optimized code for PROCESSOR.
++
++
++; ABI related options
++; (ISA constraints on ABI are handled dynamically)
++
++;; Base ABI
++Enum
++Name(abi_base) Type(int)
++Base ABI types for LoongArch:
++
++EnumValue
++Enum(abi_base) String(@@STR_ABI_BASE_LP64D@@) Value(ABI_BASE_LP64D)
++
++EnumValue
++Enum(abi_base) String(@@STR_ABI_BASE_LP64F@@) Value(ABI_BASE_LP64F)
++
++EnumValue
++Enum(abi_base) String(@@STR_ABI_BASE_LP64S@@) Value(ABI_BASE_LP64S)
++
++m@@OPTSTR_ABI_BASE@@=
++Target RejectNegative Joined ToLower Enum(abi_base) Var(la_opt_abi_base) Init(M_OPT_UNSET)
++-m@@OPTSTR_ABI_BASE@@=BASEABI	Generate code that conforms to the given BASEABI.
++
++;; Legacy option: -mabi=lp64
++m@@OPTSTR_ABI_BASE@@=@@STR_ABI_BASE_LP64@@
++Target RejectNegative Mask(LP64)
++-m@@OPTSTR_ABI_BASE@@=@@STR_ABI_BASE_LP64@@	Legacy option that enables the lp64 integer ABI.
++
++;; ABI Extension
++Variable
++int la_opt_abi_ext = M_OPT_UNSET
++
++mbranch-cost=
++Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
++-mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
++
++mvecarg
++Target Report Var(TARGET_VECARG) Init(1)
++Target pass vect arg uses vector register.
++
++mmemvec-cost=
++Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
++mmemvec-cost=COST      Set the cost of vector memory access instructions.
++
++mveclibabi=
++Target RejectNegative Joined Var(loongarch_veclibabi_name)
++Vector library ABI to use.
++
++mstackrealign
++Target Var(loongarch_stack_realign) Init(1)
++Realign stack in prologue.
++
++mforce-drap
++Target Var(loongarch_force_drap) Init(0)
++Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack.
++
++mcheck-zero-division
++Target Mask(CHECK_ZERO_DIV)
++Trap on integer divide by zero.
++
++mcond-move-int
++Target Var(TARGET_COND_MOVE_INT) Init(1)
++Conditional moves for integral are enabled.
++
++mcond-move-float
++Target Var(TARGET_COND_MOVE_FLOAT) Init(1)
++Conditional moves for float are enabled.
++
++mmemcpy
++Target Mask(MEMCPY)
++Prevent optimizing block moves, which is also the default behavior of -Os.
++
++mstrict-align
++Target Var(TARGET_STRICT_ALIGN) Init(0)
++Do not generate unaligned memory accesses.
++
++mmax-inline-memcpy-size=
++Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
++-mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
++
++mrecip
++Target Report RejectNegative Var(loongarch_recip)
++Generate reciprocals instead of divss and sqrtss.
++
++mrecip=
++Target Report RejectNegative Joined Var(loongarch_recip_name)
++Control generation of reciprocal estimates.
++
++; The code model option names for -mcmodel.
++Enum
++Name(cmodel) Type(int)
++The code model option names for -mcmodel:
++
++EnumValue
++Enum(cmodel) String(@@STR_CMODEL_NORMAL@@) Value(CMODEL_NORMAL)
++
++EnumValue
++Enum(cmodel) String(@@STR_CMODEL_TINY@@) Value(CMODEL_TINY)
++
++EnumValue
++Enum(cmodel) String(@@STR_CMODEL_TS@@) Value(CMODEL_TINY_STATIC)
++
++EnumValue
++Enum(cmodel) String(@@STR_CMODEL_LARGE@@) Value(CMODEL_LARGE)
++
++EnumValue
++Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME)
++
++mcmodel=
++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET)
++Specify the code model.
+diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
+index 1304e2e97..603aed5a2 100644
+--- a/gcc/config/loongarch/gnu-user.h
++++ b/gcc/config/loongarch/gnu-user.h
+@@ -1,4 +1,5 @@
+-/* Definitions for LARCH systems using GNU userspace.
++/* Definitions for LoongArch systems using GNU (glibc-based) userspace,
++   or other userspace with libc derived from glibc.
+    Copyright (C) 1998-2018 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+@@ -17,116 +18,66 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
++/* Define the size of the wide character type.  */
+ #undef WCHAR_TYPE
+ #define WCHAR_TYPE "int"
+ 
+ #undef WCHAR_TYPE_SIZE
+ #define WCHAR_TYPE_SIZE 32
+ 
+-#undef ASM_DECLARE_OBJECT_NAME
+-#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name
+ 
+-/* If we don't set MASK_ABICALLS, we can't default to PIC.  */
+-/* #undef TARGET_DEFAULT */
+-/* #define TARGET_DEFAULT MASK_ABICALLS */
++/* GNU-specific SPEC definitions.  */
++#define GNU_USER_LINK_EMULATION "elf" ABI_GRLEN_SPEC "loongarch"
+ 
+-#define TARGET_OS_CPP_BUILTINS()				\
+-  do {								\
+-    GNU_USER_TARGET_OS_CPP_BUILTINS();				\
+-    /* The GNU C++ standard library requires this.  */		\
+-    if (c_dialect_cxx ())					\
+-      builtin_define ("_GNU_SOURCE");				\
+-  } while (0)
++#undef GLIBC_DYNAMIC_LINKER
++#define GLIBC_DYNAMIC_LINKER \
++  "/lib" ABI_GRLEN_SPEC "/" \
++  "%{mabi=lp64d:ld.so.1;" \
++    "mabi=lp64s:ld-linux-loongarch-lp64s.so.1;" \
++    "mabi=lp64f:ld-linux-loongarch-lp64f.so.1}"
+ 
+-#undef SUBTARGET_CPP_SPEC
+-#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+-
+-/* A standard GNU/Linux mapping.  On most targets, it is included in
+-   CC1_SPEC itself by config/linux.h, but loongarch.h overrides CC1_SPEC
+-   and provides this hook instead.  */
+-#undef SUBTARGET_CC1_SPEC
+-#define SUBTARGET_CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+-
+-/* -G is incompatible with -KPIC which is the default, so only allow objects
+-   in the small data section if the user explicitly asks for it.  */
+-#undef LARCH_DEFAULT_GVALUE
+-#define LARCH_DEFAULT_GVALUE 0
++#undef MUSL_DYNAMIC_LINKER
++#define MUSL_DYNAMIC_LINKER \
++  "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1"
+ 
+ #undef GNU_USER_TARGET_LINK_SPEC
+-#define GNU_USER_TARGET_LINK_SPEC "\
+-  %{G*} %{EB} %{EL} %{shared} \
+-  %{!shared: \
+-    %{!static: \
+-      %{rdynamic:-export-dynamic} \
+-      %{mabi=lp32: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP32 "} \
+-      %{mabi=lp64: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP64 "}} \
+-    %{static}} \
+-  %{mabi=lp32:-m" GNU_USER_LINK_EMULATION32 "} \
+-  %{mabi=lp64:-m" GNU_USER_LINK_EMULATION64 "}"
++#define GNU_USER_TARGET_LINK_SPEC \
++  "%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \
++  "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \
++  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
+ 
+-#undef LINK_SPEC
+-#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+ 
+-/* The LARCH assembler has different syntax for .set. We set it to
+-   .dummy to trap any errors.  */
+-#undef SET_ASM_OP
+-#define SET_ASM_OP "\t.dummy\t"
+-
+-#undef ASM_OUTPUT_DEF
+-#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+- do {									\
+-	fputc ( '\t', FILE);						\
+-	assemble_name (FILE, LABEL1);					\
+-	fputs ( " = ", FILE);						\
+-	assemble_name (FILE, LABEL2);					\
+-	fputc ( '\n', FILE);						\
+- } while (0)
+-
+-/* The glibc _mcount stub will save $v0 for us.  Don't mess with saving
+-   it, since ASM_OUTPUT_REG_PUSH/ASM_OUTPUT_REG_POP do not work in the
+-   presence of $gp-relative calls.  */
+-#undef ASM_OUTPUT_REG_PUSH
+-#undef ASM_OUTPUT_REG_POP
++/* Similar to standard Linux, but adding -ffast-math support.  */
++#undef GNU_USER_TARGET_MATHFILE_SPEC
++#define GNU_USER_TARGET_MATHFILE_SPEC \
++  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+ 
+ #undef LIB_SPEC
+ #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+ 
+-#define NO_SHARED_SPECS ""
+-
+-/* -march=native handling only makes sense with compiler running on
+-   a LARCH chip.  */
+-#if defined(__loongarch__)
+-extern const char *host_detect_local_cpu (int argc, const char **argv);
+-# define EXTRA_SPEC_FUNCTIONS \
+-  { "local_cpu_detect", host_detect_local_cpu },
+-
+-# define MARCH_MTUNE_NATIVE_SPECS				\
+-  " %{march=native:%<march=native %:local_cpu_detect(arch)}"	\
+-  " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+-#else
+-# define MARCH_MTUNE_NATIVE_SPECS ""
+-#endif
+-
+-#define LINUX_DRIVER_SELF_SPECS \
+-  NO_SHARED_SPECS							\
+-  MARCH_MTUNE_NATIVE_SPECS,						\
+-  "%{!EB:%{!EL:%(endian_spec)}}",					\
+-  "%{!mabi=*: -" MULTILIB_ABI_DEFAULT "}"
+-
+-#undef DRIVER_SELF_SPECS
+-#define DRIVER_SELF_SPECS \
+-  LINUX_DRIVER_SELF_SPECS,  \
+-  LARCH_ISA_LEVEL_SPEC,    \
+-  BASE_DRIVER_SELF_SPECS
++#undef LINK_SPEC
++#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+ 
+-/* Similar to standard Linux, but adding -ffast-math support.  */
+-#undef	GNU_USER_TARGET_MATHFILE_SPEC
+-#define GNU_USER_TARGET_MATHFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+-#undef  ENDFILE_SPEC
++#undef ENDFILE_SPEC
+ #define ENDFILE_SPEC \
+   GNU_USER_TARGET_MATHFILE_SPEC " " \
+   GNU_USER_TARGET_ENDFILE_SPEC
+ 
+-#undef LOCAL_LABEL_PREFIX
+-#define LOCAL_LABEL_PREFIX (TARGET_OLDABI ? "$" : ".")
++#undef SUBTARGET_CPP_SPEC
++#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
++
++/* A standard GNU/Linux mapping.  On most targets, it is included in
++   CC1_SPEC itself by config/linux.h, but loongarch.h overrides CC1_SPEC
++   and provides this hook instead.  */
++#undef SUBTARGET_CC1_SPEC
++#define SUBTARGET_CC1_SPEC GNU_USER_TARGET_CC1_SPEC
++
++#define TARGET_OS_CPP_BUILTINS() \
++  do \
++    { \
++      GNU_USER_TARGET_OS_CPP_BUILTINS (); \
++      /* The GNU C++ standard library requires this.  */ \
++      if (c_dialect_cxx ()) \
++       builtin_define ("_GNU_SOURCE"); \
++    } \
++  while (0)
+diff --git a/gcc/config/loongarch/la464.md b/gcc/config/loongarch/la464.md
+new file mode 100644
+index 000000000..ba9fc433b
+--- /dev/null
++++ b/gcc/config/loongarch/la464.md
+@@ -0,0 +1,132 @@
++;; Pipeline model for LoongArch LA464 cores.
++
++;; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;; Contributed by Loongson Co. Ltd.
++
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;; Uncomment the following line to output automata for debugging.
++;; (automata_option "v")
++
++;; Automaton for integer instructions.
++(define_automaton "la464_a_alu")
++
++;; Automaton for floating-point instructions.
++(define_automaton "la464_a_falu")
++
++;; Automaton for memory operations.
++(define_automaton "la464_a_mem")
++
++;; Describe the resources.
++
++(define_cpu_unit "la464_alu1" "la464_a_alu")
++(define_cpu_unit "la464_alu2" "la464_a_alu")
++(define_cpu_unit "la464_mem1" "la464_a_mem")
++(define_cpu_unit "la464_mem2" "la464_a_mem")
++(define_cpu_unit "la464_falu1" "la464_a_falu")
++(define_cpu_unit "la464_falu2" "la464_a_falu")
++
++;; Describe instruction reservations.
++
++(define_insn_reservation "la464_arith" 1
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "arith,clz,const,logical,
++			move,nop,shift,signext,slt"))
++  "la464_alu1 | la464_alu2")
++
++(define_insn_reservation "la464_branch" 1
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "branch,jump,call,condmove,trap"))
++  "la464_alu1 | la464_alu2")
++
++(define_insn_reservation "la464_imul" 7
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "imul"))
++  "la464_alu1 | la464_alu2")
++
++(define_insn_reservation "la464_idiv_si" 12
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (and (eq_attr "type" "idiv")
++	    (eq_attr "mode" "SI")))
++  "la464_alu1 | la464_alu2")
++
++(define_insn_reservation "la464_idiv_di" 25
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (and (eq_attr "type" "idiv")
++	    (eq_attr "mode" "DI")))
++  "la464_alu1 | la464_alu2")
++
++(define_insn_reservation "la464_load" 4
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "load"))
++  "la464_mem1 | la464_mem2")
++
++(define_insn_reservation "la464_gpr_fp" 16
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "mftg,mgtf"))
++  "la464_mem1")
++
++(define_insn_reservation "la464_fpload" 4
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "fpload"))
++  "la464_mem1 | la464_mem2")
++
++(define_insn_reservation "la464_prefetch" 0
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "prefetch,prefetchx"))
++  "la464_mem1 | la464_mem2")
++
++(define_insn_reservation "la464_store" 0
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "store,fpstore,fpidxstore"))
++  "la464_mem1 | la464_mem2")
++
++(define_insn_reservation "la464_fadd" 4
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "fadd,fmul,fmadd"))
++  "la464_falu1 | la464_falu2")
++
++(define_insn_reservation "la464_fcmp" 2
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "fabs,fcmp,fmove,fneg"))
++  "la464_falu1 | la464_falu2")
++
++(define_insn_reservation "la464_fcvt" 4
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "fcvt"))
++  "la464_falu1 | la464_falu2")
++
++(define_insn_reservation "la464_fdiv_sf" 12
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
++	    (eq_attr "mode" "SF")))
++  "la464_falu1 | la464_falu2")
++
++(define_insn_reservation "la464_fdiv_df" 19
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
++	    (eq_attr "mode" "DF")))
++  "la464_falu1 | la464_falu2")
++
++;; Force single-dispatch for unknown or multi.
++(define_insn_reservation "la464_unknown" 1
++  (and (match_test "TARGET_uARCH_LA464 || TARGET_uARCH_LA664")
++       (eq_attr "type" "unknown,multi,atomic,syncloop"))
++  "la464_alu1 + la464_alu2 + la464_falu1
++   + la464_falu2 + la464_mem1 + la464_mem2")
++
++;; End of DFA-based pipeline description for la464
+diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
+index c649bf3f4..8e26ed6f0 100644
+--- a/gcc/config/loongarch/larchintrin.h
++++ b/gcc/config/loongarch/larchintrin.h
+@@ -1,384 +1,353 @@
+ /* Intrinsics for LoongArch BASE operations.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
+ 
+-   Copyright (C) 2019 Free Software Foundation, Inc.
+-   Contributed by xuchenghua@loongson.cn.
++This file is part of GCC.
+ 
+-   This file is part of GCC.
++GCC is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published
++by the Free Software Foundation; either version 3, or (at your
++option) any later version.
+ 
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
++GCC is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++License for more details.
+ 
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
+ 
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
++You should have received a copy of the GNU General Public License and
++a copy of the GCC Runtime Library Exception along with this program;
++see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++<http://www.gnu.org/licenses/>.  */
+ 
+ #ifndef _GCC_LOONGARCH_BASE_INTRIN_H
+ #define _GCC_LOONGARCH_BASE_INTRIN_H
+ 
+ #ifdef __cplusplus
+-extern "C"{
++extern "C" {
+ #endif
+ 
+-typedef struct drdtime{
+-	unsigned long dvalue;
+-	unsigned long dtimeid;
++typedef struct drdtime
++{
++  unsigned long dvalue;
++  unsigned long dtimeid;
+ } __drdtime_t;
+ 
+-typedef struct rdtime{
+-	unsigned int value;
+-	unsigned int timeid;
++typedef struct rdtime
++{
++  unsigned int value;
++  unsigned int timeid;
+ } __rdtime_t;
+ 
+ #ifdef __loongarch64
+-extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_rdtime_d (void)
++extern __inline __drdtime_t
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__rdtime_d (void)
+ {
+-  __drdtime_t drdtime;
++  __drdtime_t __drdtime;
+   __asm__ volatile (
+     "rdtime.d\t%[val],%[tid]\n\t"
+-    : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid)
+-    :
+-  );
+-  return drdtime;
++    : [val]"=&r"(__drdtime.dvalue),[tid]"=&r"(__drdtime.dtimeid)
++    :);
++  return __drdtime;
+ }
+-#define __rdtime_d __builtin_loongarch_rdtime_d
+ #endif
+ 
+-extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_rdtimeh_w (void)
++extern __inline __rdtime_t
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__rdtimeh_w (void)
+ {
+-  __rdtime_t rdtime;
++  __rdtime_t __rdtime;
+   __asm__ volatile (
+     "rdtimeh.w\t%[val],%[tid]\n\t"
+-    : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid)
+-    :
+-  );
+-  return rdtime;
++    : [val]"=&r"(__rdtime.value),[tid]"=&r"(__rdtime.timeid)
++    :);
++  return __rdtime;
+ }
+-#define __rdtimel_w __builtin_loongarch_rdtimel_w
+ 
+-extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_rdtimel_w (void)
++extern __inline __rdtime_t
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__rdtimel_w (void)
+ {
+-  __rdtime_t rdtime;
++  __rdtime_t __rdtime;
+   __asm__ volatile (
+     "rdtimel.w\t%[val],%[tid]\n\t"
+-    : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid)
+-    :
+-  );
+-  return rdtime;
++    : [val]"=&r"(__rdtime.value),[tid]"=&r"(__rdtime.timeid)
++    :);
++  return __rdtime;
+ }
+-#define __rdtimeh_w __builtin_loongarch_rdtimeh_w
+-
+-/* Assembly instruction format:          rj, fcsr */
+-/* Data types in instruction templates:  USI, UQI */
+-#define __movfcsr2gr(/*ui5*/_1)	__builtin_loongarch_movfcsr2gr((_1));
+-
+-/* Assembly instruction format:          0, fcsr, rj  */
+-/* Data types in instruction templates:  VOID, UQI, USI */
+-#define __movgr2fcsr(/*ui5*/ _1, _2) __builtin_loongarch_movgr2fcsr((unsigned short)_1, (unsigned int)_2);
+-
+-#ifdef __loongarch32
+-/* Assembly instruction format:          ui5, rj, si12 */
+-/* Data types in instruction templates:  VOID, USI, USI, SI */
+-#define __cacop(/*ui5*/ _1, /*unsigned int*/ _2, /*si12*/ _3)	((void)__builtin_loongarch_cacop((_1), (unsigned int)(_2), (_3)))
+-#elif defined __loongarch64
+-/* Assembly instruction format:          ui5, rj, si12 */
+-/* Data types in instruction templates:  VOID, USI, UDI, SI */
+-#define __dcacop(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3)	((void)__builtin_loongarch_dcacop((_1), (unsigned long int)(_2), (_3)))
++
++/* Assembly instruction format:	rj, fcsr.  */
++/* Data types in instruction templates:  USI, UQI.  */
++#define __movfcsr2gr(/*ui5*/ _1) __builtin_loongarch_movfcsr2gr ((_1));
++
++/* Assembly instruction format:	fcsr, rj.  */
++/* Data types in instruction templates:  VOID, UQI, USI.  */
++#define __movgr2fcsr(/*ui5*/ _1, _2) \
++  __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2);
++
++#if defined __loongarch64
++/* Assembly instruction format:	ui5, rj, si12.  */
++/* Data types in instruction templates:  VOID, USI, UDI, SI.  */
++#define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \
++  ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3)))
+ #else
+-# error "Don't support this ABI."
++#error "Unsupported ABI."
+ #endif
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  USI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-unsigned int __cpucfg(unsigned int _1)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  USI, USI.  */
++extern __inline unsigned int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__cpucfg (unsigned int _1)
+ {
+-	return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1);
++  return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1);
+ }
+ 
+ #ifdef __loongarch64
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  DI, DI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-void __asrtle_d(long int _1, long int _2)
++/* Assembly instruction format:	rj, rk.  */
++/* Data types in instruction templates:  DI, DI.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__asrtle_d (long int _1, long int _2)
+ {
+-	__builtin_loongarch_asrtle_d((long int)_1, (long int)_2);
++  __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  DI, DI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-void __asrtgt_d(long int _1, long int _2)
++/* Assembly instruction format:	rj, rk.  */
++/* Data types in instruction templates:  DI, DI.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__asrtgt_d (long int _1, long int _2)
+ {
+-	__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2);
++  __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2);
+ }
+ #endif
+ 
+-#ifdef __loongarch32
+-/* Assembly instruction format:          rd, rj, ui5 */
+-/* Data types in instruction templates:  SI, SI, UQI */
+-#define __lddir(/*int*/ _1, /*ui5*/ _2)	((int)__builtin_loongarch_lddir((int)(_1), (_2)))
+-#elif defined __loongarch64
+-/* Assembly instruction format:          rd, rj, ui5 */
+-/* Data types in instruction templates:  DI, DI, UQI */
+-#define __dlddir(/*long int*/ _1, /*ui5*/ _2)	((long int)__builtin_loongarch_dlddir((long int)(_1), (_2)))
++#if defined __loongarch64
++/* Assembly instruction format:	rd, rj, ui5.  */
++/* Data types in instruction templates:  DI, DI, UQI.  */
++#define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \
++  ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2)))
+ #else
+-# error "Don't support this ABI."
++#error "Unsupported ABI."
+ #endif
+ 
+-#ifdef __loongarch32
+-/* Assembly instruction format:          rj, ui5 */
+-/* Data types in instruction templates:  VOID, SI, UQI */
+-#define __ldpte(/*int*/ _1, /*ui5*/ _2)	((void)__builtin_loongarch_ldpte((int)(_1), (_2)))
+-#elif defined __loongarch64
+-/* Assembly instruction format:          rj, ui5 */
+-/* Data types in instruction templates:  VOID, DI, UQI */
+-#define __dldpte(/*long int*/ _1, /*ui5*/ _2)	((void)__builtin_loongarch_dldpte((long int)(_1), (_2)))
++#if defined __loongarch64
++/* Assembly instruction format:	rj, ui5.  */
++/* Data types in instruction templates:  VOID, DI, UQI.  */
++#define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \
++  ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2)))
+ #else
+-# error "Don't support this ABI."
++#error "Unsupported ABI."
+ #endif
+ 
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, QI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crc_w_b_w(char _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, QI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crc_w_b_w (char _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2);
++  return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, HI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crc_w_h_w(short _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, HI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crc_w_h_w (short _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2);
++  return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, SI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crc_w_w_w(int _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, SI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crc_w_w_w (int _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2);
++  return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2);
+ }
+ 
+ #ifdef __loongarch64
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, DI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crc_w_d_w(long int _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, DI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crc_w_d_w (long int _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2);
++  return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2);
+ }
+ #endif
+ 
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, QI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crcc_w_b_w(char _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, QI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crcc_w_b_w (char _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2);
++  return (int) __builtin_loongarch_crcc_w_b_w ((char) _1, (int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, HI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crcc_w_h_w(short _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, HI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crcc_w_h_w (short _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2);
++  return (int) __builtin_loongarch_crcc_w_h_w ((short) _1, (int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, SI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crcc_w_w_w(int _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, SI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crcc_w_w_w (int _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2);
++  return (int) __builtin_loongarch_crcc_w_w_w ((int) _1, (int) _2);
+ }
+ 
+ #ifdef __loongarch64
+-/* Assembly instruction format:          rd, rj, rk */
+-/* Data types in instruction templates:  SI, DI, SI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-int __crcc_w_d_w(long int _1, int _2)
++/* Assembly instruction format:	rd, rj, rk.  */
++/* Data types in instruction templates:  SI, DI, SI.  */
++extern __inline int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__crcc_w_d_w (long int _1, int _2)
+ {
+-	return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2);
++  return (int) __builtin_loongarch_crcc_w_d_w ((long int) _1, (int) _2);
+ }
+ #endif
+ 
+-/* Assembly instruction format:          rd, ui14 */
+-/* Data types in instruction templates:  USI, USI */
+-#define __csrrd(/*ui14*/ _1)	((unsigned int)__builtin_loongarch_csrrd((_1)))
++/* Assembly instruction format:	rd, ui14.  */
++/* Data types in instruction templates:  USI, USI.  */
++#define __csrrd_w(/*ui14*/ _1) \
++  ((unsigned int) __builtin_loongarch_csrrd_w ((_1)))
+ 
+-/* Assembly instruction format:          rd, ui14 */
+-/* Data types in instruction templates:  USI, USI, USI */
+-#define __csrwr(/*unsigned int*/ _1, /*ui14*/ _2)	((unsigned int)__builtin_loongarch_csrwr((unsigned int)(_1), (_2)))
++/* Assembly instruction format:	rd, ui14.  */
++/* Data types in instruction templates:  USI, USI, USI.  */
++#define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \
++  ((unsigned int) __builtin_loongarch_csrwr_w ((unsigned int) (_1), (_2)))
+ 
+-/* Assembly instruction format:          rd, rj, ui14 */
+-/* Data types in instruction templates:  USI, USI, USI, USI */
+-#define __csrxchg(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3)	((unsigned int)__builtin_loongarch_csrxchg((unsigned int)(_1), (unsigned int)(_2), (_3)))
++/* Assembly instruction format:	rd, rj, ui14.  */
++/* Data types in instruction templates:  USI, USI, USI, USI.  */
++#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \
++  ((unsigned int) __builtin_loongarch_csrxchg_w ((unsigned int) (_1), \
++					       (unsigned int) (_2), (_3)))
+ 
+ #ifdef __loongarch64
+-/* Assembly instruction format:          rd, ui14 */
+-/* Data types in instruction templates:  UDI, USI */
+-#define __dcsrrd(/*ui14*/ _1)	((unsigned long int)__builtin_loongarch_dcsrrd((_1)))
+-
+-/* Assembly instruction format:          rd, ui14 */
+-/* Data types in instruction templates:  UDI, UDI, USI */
+-#define __dcsrwr(/*unsigned long int*/ _1, /*ui14*/ _2)	((unsigned long int)__builtin_loongarch_dcsrwr((unsigned long int)(_1), (_2)))
+-
+-/* Assembly instruction format:          rd, rj, ui14 */
+-/* Data types in instruction templates:  UDI, UDI, UDI, USI */
+-#define __dcsrxchg(/*unsigned long int*/ _1, /*unsigned long int*/ _2, /*ui14*/ _3)	((unsigned long int)__builtin_loongarch_dcsrxchg((unsigned long int)(_1), (unsigned long int)(_2), (_3)))
++/* Assembly instruction format:	rd, ui14.  */
++/* Data types in instruction templates:  UDI, USI.  */
++#define __csrrd_d(/*ui14*/ _1) \
++  ((unsigned long int) __builtin_loongarch_csrrd_d ((_1)))
++
++/* Assembly instruction format:	rd, ui14.  */
++/* Data types in instruction templates:  UDI, UDI, USI.  */
++#define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \
++  ((unsigned long int) __builtin_loongarch_csrwr_d ((unsigned long int) (_1), \
++						   (_2)))
++
++/* Assembly instruction format:	rd, rj, ui14.  */
++/* Data types in instruction templates:  UDI, UDI, UDI, USI.  */
++#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \
++		   /*ui14*/ _3) \
++  ((unsigned long int) __builtin_loongarch_csrxchg_d ( \
++    (unsigned long int) (_1), (unsigned long int) (_2), (_3)))
+ #endif
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  UQI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-unsigned char __iocsrrd_b(unsigned int _1)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  UQI, USI.  */
++extern __inline unsigned char
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrrd_b (unsigned int _1)
+ {
+-	return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
++  return (unsigned char) __builtin_loongarch_iocsrrd_b ((unsigned int) _1);
+ }
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  UHI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-unsigned short __iocsrrd_h(unsigned int _1)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  UHI, USI.  */
++extern __inline unsigned char
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrrd_h (unsigned int _1)
+ {
+-	return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
++  return (unsigned short) __builtin_loongarch_iocsrrd_h ((unsigned int) _1);
+ }
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  USI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-unsigned int __iocsrrd_w(unsigned int _1)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  USI, USI.  */
++extern __inline unsigned int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrrd_w (unsigned int _1)
+ {
+-	return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1);
++  return (unsigned int) __builtin_loongarch_iocsrrd_w ((unsigned int) _1);
+ }
+ 
+ #ifdef __loongarch64
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  UDI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-unsigned long int __iocsrrd_d(unsigned int _1)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  UDI, USI.  */
++extern __inline unsigned long int
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrrd_d (unsigned int _1)
+ {
+-	return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1);
++  return (unsigned long int) __builtin_loongarch_iocsrrd_d ((unsigned int) _1);
+ }
+ #endif
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  VOID, UQI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-void __iocsrwr_b(unsigned char _1, unsigned int _2)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  VOID, UQI, USI.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrwr_b (unsigned char _1, unsigned int _2)
+ {
+-	return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2);
++  __builtin_loongarch_iocsrwr_b ((unsigned char) _1, (unsigned int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  VOID, UHI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-void __iocsrwr_h(unsigned short _1, unsigned int _2)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  VOID, UHI, USI.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrwr_h (unsigned short _1, unsigned int _2)
+ {
+-	return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2);
++  __builtin_loongarch_iocsrwr_h ((unsigned short) _1, (unsigned int) _2);
+ }
+ 
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  VOID, USI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-void __iocsrwr_w(unsigned int _1, unsigned int _2)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  VOID, USI, USI.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrwr_w (unsigned int _1, unsigned int _2)
+ {
+-	return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2);
++  __builtin_loongarch_iocsrwr_w ((unsigned int) _1, (unsigned int) _2);
+ }
+ 
+ #ifdef __loongarch64
+-/* Assembly instruction format:          rd, rj */
+-/* Data types in instruction templates:  VOID, UDI, USI */
+-extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-void __iocsrwr_d(unsigned long int _1, unsigned int _2)
++/* Assembly instruction format:	rd, rj.  */
++/* Data types in instruction templates:  VOID, UDI, USI.  */
++extern __inline void
++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
++__iocsrwr_d (unsigned long int _1, unsigned int _2)
+ {
+-	return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2);
++  __builtin_loongarch_iocsrwr_d ((unsigned long int) _1, (unsigned int) _2);
+ }
+ #endif
+ 
+-/* Assembly instruction format:          ui15 */
+-/* Data types in instruction templates:  UQI */
+-#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1))
+-
+-/* Assembly instruction format:          ui15 */
+-/* Data types in instruction templates:  UQI */
+-#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1))
+-
+-#define __builtin_loongarch_syscall(a) \
+-{                                       \
+-  __asm__ volatile ("syscall %0\n\t"    \
+-      ::"I"(a));                        \
+-}
+-#define __syscall __builtin_loongarch_syscall
+-
+-#define __builtin_loongarch_break(a) \
+-{                                       \
+-  __asm__ volatile ("break %0\n\t"    \
+-      ::"I"(a));                        \
+-}
+-#define __break __builtin_loongarch_break
+-
+-
+-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_tlbsrch (void)
+-{
+-  __asm__ volatile ("tlbsrch\n\t");
+-}
+-#define __tlbsrch __builtin_loongarch_tlbsrch
+-
+-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_tlbrd (void)
+-{
+-  __asm__ volatile ("tlbrd\n\t");
+-}
+-#define __tlbrd __builtin_loongarch_tlbrd
++/* Assembly instruction format:	ui15.  */
++/* Data types in instruction templates:  USI.  */
++#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1))
+ 
+-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_tlbwr (void)
+-{
+-  __asm__ volatile ("tlbwr\n\t");
+-}
+-#define __tlbwr __builtin_loongarch_tlbwr
++/* Assembly instruction format:	ui15.  */
++/* Data types in instruction templates:  USI.  */
++#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar ((_1))
+ 
+-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_tlbfill (void)
+-{
+-  __asm__ volatile ("tlbfill\n\t");
+-}
+-#define __tlbfill __builtin_loongarch_tlbfill
+-
+-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_tlbclr (void)
+-{
+-  __asm__ volatile ("tlbclr\n\t");
+-}
+-#define __tlbclr __builtin_loongarch_tlbclr
+-
+-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__builtin_loongarch_tlbflush (void)
+-{
+-  __asm__ volatile ("tlbflush\n\t");
+-}
+-#define __tlbflush __builtin_loongarch_tlbflush
++/* Assembly instruction format:	ui15.  */
++/* Data types in instruction templates:  USI.  */
++#define __syscall(/*ui15*/ _1) __builtin_loongarch_syscall ((_1))
+ 
++/* Assembly instruction format:	ui15.  */
++/* Data types in instruction templates:  USI.  */
++#define __break(/*ui15*/ _1) __builtin_loongarch_break ((_1))
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
+index 24757aaa1..515336e05 100644
+--- a/gcc/config/loongarch/lasx.md
++++ b/gcc/config/loongarch/lasx.md
+@@ -212,6 +212,9 @@
+ ;; As ILASX but excludes V32QI.
+ (define_mode_iterator ILASX_DWH [V4DI V8SI V16HI])
+ 
++;; As LASX but excludes V32QI.
++(define_mode_iterator LASX_DWH [V4DF V8SF V4DI V8SI V16HI])
++
+ ;; As ILASX but excludes V4DI.
+ (define_mode_iterator ILASX_WHB [V8SI V16HI V32QI])
+ 
+@@ -227,7 +230,7 @@
+ ;; Only used for immediate set shuffle elements instruction.
+ (define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF])
+ 
+-;; The atribute gives the integer vector mode with same size in Loongson ASX.
++;; The attribute gives the integer vector mode with same size in Loongson ASX.
+  (define_mode_attr VIMODE256
+    [(V4DF "V4DI")
+     (V8SF "V8SI")
+@@ -476,6 +479,37 @@
+    (V16HI "w")
+    (V32QI "w")])
+ 
++(define_int_iterator FRINT256_S [UNSPEC_LASX_XVFRINTRP_S
++			       UNSPEC_LASX_XVFRINTRZ_S
++			       UNSPEC_LASX_XVFRINT 
++			       UNSPEC_LASX_XVFRINTRM_S])
++
++(define_int_iterator FRINT256_D [UNSPEC_LASX_XVFRINTRP_D
++			       UNSPEC_LASX_XVFRINTRZ_D
++			       UNSPEC_LASX_XVFRINT 
++			       UNSPEC_LASX_XVFRINTRM_D])
++
++(define_int_attr frint256_pattern_s
++  [(UNSPEC_LASX_XVFRINTRP_S  "ceil")
++   (UNSPEC_LASX_XVFRINTRZ_S  "btrunc")
++   (UNSPEC_LASX_XVFRINT	     "rint")
++   (UNSPEC_LASX_XVFRINTRM_S  "floor")])
++
++(define_int_attr frint256_pattern_d
++  [(UNSPEC_LASX_XVFRINTRP_D  "ceil")
++   (UNSPEC_LASX_XVFRINTRZ_D  "btrunc")
++   (UNSPEC_LASX_XVFRINT	     "rint")
++   (UNSPEC_LASX_XVFRINTRM_D  "floor")])
++
++(define_int_attr frint256_suffix
++  [(UNSPEC_LASX_XVFRINTRP_S  "rp")
++   (UNSPEC_LASX_XVFRINTRP_D  "rp")
++   (UNSPEC_LASX_XVFRINTRZ_S  "rz")
++   (UNSPEC_LASX_XVFRINTRZ_D  "rz")
++   (UNSPEC_LASX_XVFRINT	     "")
++   (UNSPEC_LASX_XVFRINTRM_S  "rm")
++   (UNSPEC_LASX_XVFRINTRM_D  "rm")])
++
+ (define_expand "vec_init<mode><unitmode>"
+   [(match_operand:LASX 0 "register_operand")
+    (match_operand:LASX 1 "")]
+@@ -497,7 +531,6 @@
+   "xvpickev.<hslasxfmt>\t%u0,%u2,%u1\n\txvpermi.d\t%u0,%u0,0xd8"
+   [(set_attr "type" "simd_permute")
+    (set_attr "mode" "<MODE>")
+-   (set_attr "can_delay" "no")
+    (set_attr "length" "8")])
+ 
+ (define_expand "vec_unpacks_hi_v8sf"
+@@ -522,7 +555,6 @@
+   operands[2] = loongarch_lsx_vec_parallel_const_half (V8SFmode, false/*high_p*/);
+ })
+ 
+-
+ (define_expand "vec_unpacks_hi_<mode>"
+   [(match_operand:<VDMODE256> 0 "register_operand")
+    (match_operand:ILASX_WHB 1 "register_operand")]
+@@ -560,11 +592,11 @@
+ })
+ 
+ (define_insn "lasx_xvinsgr2vr_<lasxfmt_f_wd>"
+-  [(set (match_operand:LASX_WD 0 "register_operand" "=f")
+-	(vec_merge:LASX_WD
+-	  (vec_duplicate:LASX_WD
++  [(set (match_operand:ILASX_DW 0 "register_operand" "=f")
++	(vec_merge:ILASX_DW
++	  (vec_duplicate:ILASX_DW
+ 	    (match_operand:<UNITMODE> 1 "reg_or_0_operand" "rJ"))
+-	  (match_operand:LASX_WD 2 "register_operand" "0")
++	  (match_operand:ILASX_DW 2 "register_operand" "0")
+ 	  (match_operand 3 "const_<bitmask256>_operand" "")))]
+   "ISA_HAS_LASX"
+ {
+@@ -651,28 +683,49 @@
+    (set_attr "mode" "V4DI")])
+ 
+ ;; xshuf.w
+-(define_insn "lasx_xvperm_w"
+-  [(set (match_operand:V8SI 0 "register_operand" "=f")
+-         (unspec:V8SI
+-	   [(match_operand:V8SI 1 "register_operand" "f")
+-	    (match_operand:V8SI 2 "register_operand" "f")]
+-	   UNSPEC_LASX_XVPERM_W))]
++(define_insn "lasx_xvperm_<lasxfmt_f_wd>"
++  [(set (match_operand:LASX_W 0 "register_operand" "=f")
++         (unspec:LASX_W
++           [(match_operand:LASX_W 1 "nonimmediate_operand" "f")
++            (match_operand:V8SI 2 "register_operand" "f")]
++           UNSPEC_LASX_XVPERM_W))]
+   "ISA_HAS_LASX"
+   "xvperm.w\t%u0,%u1,%u2"
+   [(set_attr "type" "simd_splat")
+-   (set_attr "mode" "V8SI")])
++   (set_attr "mode" "<MODE>")])
+ 
+ ;; xvpermi.d
+-(define_insn "lasx_xvpermi_d"
+-  [(set (match_operand:V4DI 0 "register_operand" "=f")
+-         (unspec:V4DI
+-	   [(match_operand:V4DI 1 "register_operand" "f")
+-	    (match_operand     2 "const_uimm8_operand")]
+-	   UNSPEC_LASX_XVPERMI_D))]
++(define_insn "lasx_xvpermi_d_<LASX:mode>"
++  [(set (match_operand:LASX 0 "register_operand" "=f")
++         (unspec:LASX
++           [(match_operand:LASX 1 "register_operand" "f")
++            (match_operand:SI     2 "const_uimm8_operand")]
++           UNSPEC_LASX_XVPERMI_D))]
+   "ISA_HAS_LASX"
+   "xvpermi.d\t%u0,%u1,%2"
+   [(set_attr "type" "simd_splat")
+-   (set_attr "mode" "V4DI")])
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "lasx_xvpermi_d_<mode>_1"
++  [(set (match_operand:LASX_D 0 "register_operand" "=f")
++    (vec_select:LASX_D
++      (match_operand:LASX_D 1 "register_operand" "f")
++      (parallel [(match_operand 2 "const_0_to_3_operand")
++             (match_operand 3 "const_0_to_3_operand")
++             (match_operand 4 "const_0_to_3_operand")
++             (match_operand 5 "const_0_to_3_operand")])))]
++  "ISA_HAS_LASX"
++{
++  int mask = 0;
++  mask |= INTVAL (operands[2]) << 0;
++  mask |= INTVAL (operands[3]) << 2;
++  mask |= INTVAL (operands[4]) << 4;
++  mask |= INTVAL (operands[5]) << 6;
++  operands[2] = GEN_INT (mask);
++  return "xvpermi.d\t%u0,%u1,%2";
++}
++  [(set_attr "type" "simd_splat")
++   (set_attr "mode" "<MODE>")])
+ 
+ ;; xvpermi.q
+ (define_insn "lasx_xvpermi_q_<LASX:mode>"
+@@ -698,82 +751,51 @@
+   [(set_attr "type" "simd_copy")
+    (set_attr "mode" "V4DI")])
+ 
+-(define_expand "vec_extract<mode><unitmode>"
+-  [(match_operand:<UNITMODE> 0 "register_operand")
+-   (match_operand:ILASX 1 "register_operand")
++(define_expand "vec_set<mode>"
++  [(match_operand:ILASX_DW 0 "register_operand")
++   (match_operand:<UNITMODE> 1 "reg_or_0_operand")
+    (match_operand 2 "const_<indeximm256>_operand")]
+   "ISA_HAS_LASX"
+ {
+-  if (<UNITMODE>mode == SImode || <UNITMODE>mode == DImode)
+-    {
+-      emit_insn(gen_lasx_xvpickve2gr_<lasxfmt_wd> (operands[0], operands[1], operands[2]));
+-    }
+-  else
+-    {
+-      HOST_WIDE_INT size_0 = GET_MODE_SIZE (GET_MODE (operands[0]));
+-      HOST_WIDE_INT size_1 = GET_MODE_SIZE (GET_MODE (operands[1]));
+-      HOST_WIDE_INT val = INTVAL (operands[2]);
++  rtx index = GEN_INT (1 << INTVAL (operands[2]));
++  emit_insn (gen_lasx_xvinsgr2vr_<lasxfmt_f_wd> (operands[0], operands[1],
++                      operands[0], index));
++  DONE;
++})
+ 
+-      /* High part */
+-      if (val >= size_1/size_0/2 )
+-      {
+-        rtx dest1 = gen_reg_rtx (GET_MODE (operands[1]));
+-        rtx pos = GEN_INT( val - size_1/size_0/2);
+-        emit_insn (gen_lasx_xvpermi_q_<mode256_i> (dest1, dest1, operands[1], GEN_INT(1)));
+-        rtx dest2 = gen_reg_rtx (SImode);
+-        emit_insn (gen_lsx_vpickve2gr_<lasxfmt> (dest2,
+-                                            gen_lowpart(<VHMODE256>mode, dest1),
+-					    pos));
+-        emit_move_insn (operands[0],
+-		     gen_lowpart (<UNITMODE>mode, dest2));
+-      }
+-      else
+-      {
+-        rtx dest1 = gen_reg_rtx (SImode);
+-        emit_insn (gen_lsx_vpickve2gr_<lasxfmt> (dest1,
+-                                            gen_lowpart(<VHMODE256>mode, operands[1]),
+-					    operands[2]));
+-        emit_move_insn (operands[0],
+-		     gen_lowpart (<UNITMODE>mode, dest1));
+-      }
+-    }
++(define_expand "vec_set<mode>"
++  [(match_operand:FLASX 0 "register_operand")
++   (match_operand:<UNITMODE> 1 "reg_or_0_operand")
++   (match_operand 2 "const_<indeximm256>_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx index = GEN_INT (1 << INTVAL (operands[2]));
++  emit_insn (gen_lasx_xvinsve0_<lasxfmt_f>_scalar (operands[0], operands[1],
++                      operands[0], index));
+   DONE;
+ })
+ 
+ (define_expand "vec_extract<mode><unitmode>"
+   [(match_operand:<UNITMODE> 0 "register_operand")
+-   (match_operand:FLASX 1 "register_operand")
++   (match_operand:LASX 1 "register_operand")
+    (match_operand 2 "const_<indeximm256>_operand")]
+   "ISA_HAS_LASX"
+ {
+-  rtx temp;
+-  HOST_WIDE_INT val = INTVAL (operands[2]);
+-
+-  if (val == 0)
+-    temp = operands[1];
+-  else
+-    {
+-      temp = gen_reg_rtx (<MODE>mode);
+-      emit_insn (gen_lasx_xvpickve_<lasxfmt_f> (temp, operands[1], operands[2]));
+-    }
+-  emit_insn (gen_lasx_vec_extract_<lasxfmt_f> (operands[0], temp));
++  loongarch_expand_vector_extract (operands[0], operands[1],
++      INTVAL (operands[2]));
+   DONE;
+ })
+ 
+-(define_insn_and_split "lasx_vec_extract_<lasxfmt_f>"
+-  [(set (match_operand:<UNITMODE> 0 "register_operand" "=f")
+-	(vec_select:<UNITMODE>
+-	  (match_operand:FLASX 1 "register_operand" "f")
+-	  (parallel [(const_int 0)])))]
++(define_expand "vec_perm<mode>"
++ [(match_operand:LASX 0 "register_operand")
++  (match_operand:LASX 1 "register_operand")
++  (match_operand:LASX 2 "register_operand")
++  (match_operand:<VIMODE256> 3 "register_operand")]
+   "ISA_HAS_LASX"
+-  "#"
+-  "&& reload_completed"
+-  [(set (match_dup 0) (match_dup 1))]
+ {
+-   operands[1] = gen_rtx_REG (<UNITMODE>mode, REGNO (operands[1]));
+-}
+-  [(set_attr "move_type" "fmove")
+-   (set_attr "mode" "<UNITMODE>")])
++   loongarch_expand_vec_perm_1(operands);
++   DONE;
++})
+ 
+ ;; FIXME: 256??
+ (define_expand "vcondu<LASX:mode><ILASX:mode>"
+@@ -860,7 +882,6 @@
+   { return loongarch_output_move (operands[0], operands[1]); }
+   [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
+    (set_attr "mode" "<MODE>")
+-   (set_attr "can_delay" "no,yes,yes,yes,yes")
+    (set_attr "length" "8,4,4,4,4")])
+ 
+ 
+@@ -868,7 +889,7 @@
+   [(set (match_operand:LASX 0 "nonimmediate_operand")
+ 	(match_operand:LASX 1 "move_operand"))]
+   "reload_completed && ISA_HAS_LASX
+-   && loongarch_split_move_insn_p (operands[0], operands[1], insn)"
++   && loongarch_split_move_insn_p (operands[0], operands[1])"
+   [(const_int 0)]
+ {
+   loongarch_split_move_insn (operands[0], operands[1], curr_insn);
+@@ -1143,7 +1164,25 @@
+   [(set_attr "type" "simd_fmul")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "div<mode>3"
++(define_expand "div<mode>3"
++  [(set (match_operand:FLASX 0 "register_operand")
++    (div:FLASX (match_operand:FLASX 1 "register_operand")
++          (match_operand:FLASX 2 "register_operand")))]
++  "ISA_HAS_LASX"
++{
++  if (<MODE>mode == V8SFmode
++    && TARGET_RECIP_VEC_DIV
++    && optimize_insn_for_speed_p ()
++    && flag_finite_math_only && !flag_trapping_math
++    && flag_unsafe_math_optimizations)
++  {
++    loongarch_emit_swdivsf (operands[0], operands[1],
++           operands[2], V8SFmode);
++    DONE;
++  }
++})
++
++(define_insn "*div<mode>3"
+   [(set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(div:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ 		  (match_operand:FLASX 2 "register_operand" "f")))]
+@@ -1172,7 +1211,23 @@
+   [(set_attr "type" "simd_fmadd")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "sqrt<mode>2"
++(define_expand "sqrt<mode>2"
++  [(set (match_operand:FLASX 0 "register_operand")
++    (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))]
++  "ISA_HAS_LASX"
++{
++  if (<MODE>mode == V8SFmode
++      && TARGET_RECIP_VEC_SQRT
++      && flag_unsafe_math_optimizations
++      && optimize_insn_for_speed_p ()
++      && flag_finite_math_only && !flag_trapping_math)
++    {
++      loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 0);
++      DONE;
++    }
++})
++
++(define_insn "*sqrt<mode>2"
+   [(set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f")))]
+   "ISA_HAS_LASX"
+@@ -1307,13 +1362,13 @@
+   [(set_attr "type" "simd_bit")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "lasx_xvbitsel_<lasxfmt>"
+-  [(set (match_operand:ILASX 0 "register_operand" "=f")
+-	(ior:ILASX (and:ILASX (not:ILASX
+-			      (match_operand:ILASX 3 "register_operand" "f"))
+-			    (match_operand:ILASX 1 "register_operand" "f"))
+-		  (and:ILASX (match_dup 3)
+-			    (match_operand:ILASX 2 "register_operand" "f"))))]
++(define_insn "lasx_xvbitsel_<lasxfmt_f>"
++  [(set (match_operand:LASX 0 "register_operand" "=f")
++        (ior:LASX (and:LASX (not:LASX
++                              (match_operand:LASX 3 "register_operand" "0"))
++                            (match_operand:LASX 1 "register_operand" "f"))
++                  (and:LASX (match_dup 3)
++                            (match_operand:LASX 2 "register_operand" "f"))))]
+   "ISA_HAS_LASX"
+   "xvbitsel.v\t%u0,%u1,%u2,%u3"
+   [(set_attr "type" "simd_bitmov")
+@@ -1363,11 +1418,11 @@
+   [(set_attr "type" "simd_int_arith")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_expand "vec_cmp<mode><mode>"
+-  [(set (match_operand:ILASX 0 "register_operand")
+-	(match_operator:ILASX 1 ""
+-	  [(match_operand:ILASX 2 "register_operand")
+-	   (match_operand:ILASX 3 "register_operand")]))]
++(define_expand "vec_cmp<mode><mode256_i>"
++  [(set (match_operand:<VIMODE256> 0 "register_operand")
++	(match_operator 1 ""
++	  [(match_operand:LASX 2 "register_operand")
++	   (match_operand:LASX 3 "register_operand")]))]
+   "ISA_HAS_LASX"
+ {
+   bool ok = loongarch_expand_int_vec_cmp (operands);
+@@ -1375,11 +1430,11 @@
+   DONE;
+ })
+ 
+-(define_expand "vec_cmp<mode><mode>"
+-  [(set (match_operand:FLASX 0 "register_operand")
+-	(match_operator:FLASX 1 ""
+-	  [(match_operand:FLASX 2 "register_operand")
+-	   (match_operand:FLASX 3 "register_operand")]))]
++(define_expand "vec_cmpu<ILASX:mode><mode256_i>"
++  [(set (match_operand:<VIMODE256> 0 "register_operand")
++	(match_operator 1 ""
++	  [(match_operand:ILASX 2 "register_operand")
++	   (match_operand:ILASX 3 "register_operand")]))]
+   "ISA_HAS_LASX"
+ {
+   bool ok = loongarch_expand_fp_vec_cmp (operands);
+@@ -1493,8 +1548,8 @@
+    (V2DF "V8SI")])
+ 
+ (define_insn "lasx_xvreplgr2vr_<lasxfmt_f>"
+-  [(set (match_operand:LASX 0 "register_operand" "=f,f")
+-	(vec_duplicate:LASX
++  [(set (match_operand:ILASX 0 "register_operand" "=f,f")
++	(vec_duplicate:ILASX
+ 	  (match_operand:<UNITMODE> 1 "reg_or_0_operand" "r,J")))]
+   "ISA_HAS_LASX"
+ {
+@@ -1508,10 +1563,9 @@
+ }
+   [(set_attr "type" "simd_fill")
+    (set_attr "mode" "<MODE>")
+-   (set_attr "can_delay" "no")
+    (set_attr "length" "8")])
+ 
+-(define_insn "lasx_xvflogb_<flasxfmt>"
++(define_insn "logb<mode>2"
+   [(set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFLOGB))]
+@@ -1572,6 +1626,15 @@
+   [(set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>")])
+ 
++(define_insn "lasx_xvfrecipe_<flasxfmt>"
++  [(set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
++             UNSPEC_RECIPE))]
++  "ISA_HAS_LASX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_DIV"
++  "xvfrecipe.<flasxfmt>\t%u0,%u1"
++  [(set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>")])
++
+ (define_insn "lasx_xvfrint_<flasxfmt>"
+   [(set (match_operand:FLASX 0 "register_operand" "=f")
+ 	(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+@@ -1590,6 +1653,42 @@
+   [(set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>")])
+ 
++
++(define_insn "lasx_xvfrsqrte_<flasxfmt>"
++  [(set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
++             UNSPEC_RSQRTE))]
++  "ISA_HAS_LASX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_RSQRT"
++  "xvfrsqrte.<flasxfmt>\t%u0,%u1"
++  [(set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>")])
++
++(define_expand "rsqrt<mode>2"
++  [(set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
++             UNSPEC_LASX_XVFRSQRT))]
++  "ISA_HAS_LASX"
++{
++   if (<MODE>mode == V8SFmode
++       && TARGET_RECIP_VEC_RSQRT
++       && flag_unsafe_math_optimizations
++       && optimize_insn_for_speed_p ()
++       && flag_finite_math_only && !flag_trapping_math)
++     {
++       loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 1);
++       DONE;
++     }
++})
++
++(define_insn "*rsqrt<mode>2"
++  [(set (match_operand:FLASX 0 "register_operand" "=f")
++    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
++             UNSPEC_LASX_XVFRSQRT))]
++  "ISA_HAS_LASX"
++  "xvfrsqrt.<flasxfmt>\t%u0,%u1"
++  [(set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>")])
++
+ (define_insn "lasx_xvftint_s_<ilasxfmt>_<flasxfmt>"
+   [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ 	(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
+@@ -2325,6 +2424,35 @@
+   [(set_attr "type" "simd_shf")
+    (set_attr "mode" "<MODE>")])
+ 
++(define_insn "lasx_xvshuf4i_<lasxfmt_f>_1"
++  [(set (match_operand:LASX_W 0 "register_operand" "=f")
++    (vec_select:LASX_W
++      (match_operand:LASX_W 1 "nonimmediate_operand" "f")
++      (parallel [(match_operand 2 "const_0_to_3_operand")
++             (match_operand 3 "const_0_to_3_operand")
++             (match_operand 4 "const_0_to_3_operand")
++             (match_operand 5 "const_0_to_3_operand")
++             (match_operand 6 "const_4_to_7_operand")
++             (match_operand 7 "const_4_to_7_operand")
++             (match_operand 8 "const_4_to_7_operand")
++             (match_operand 9 "const_4_to_7_operand")])))]
++  "ISA_HAS_LASX
++   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
++   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
++   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
++   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
++{
++  int mask = 0;
++  mask |= INTVAL (operands[2]) << 0;
++  mask |= INTVAL (operands[3]) << 2;
++  mask |= INTVAL (operands[4]) << 4;
++  mask |= INTVAL (operands[5]) << 6;
++  operands[2] = GEN_INT (mask);
++
++  return "xvshuf4i.w\t%u0,%u1,%2";
++}
++  [(set_attr "type" "simd_shf")
++   (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "lasx_xvsrar_<lasxfmt>"
+   [(set (match_operand:ILASX 0 "register_operand" "=f")
+@@ -2386,11 +2514,11 @@
+   [(set_attr "type" "simd_int_arith")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "lasx_xvshuf_<lasxfmt>"
+-  [(set (match_operand:ILASX_DWH 0 "register_operand" "=f")
+-	(unspec:ILASX_DWH [(match_operand:<VIMODE256> 1 "register_operand" "0")
+-                     (match_operand:ILASX_DWH 2 "register_operand" "f")
+-		     (match_operand:ILASX_DWH 3 "register_operand" "f")]
++(define_insn "lasx_xvshuf_<lasxfmt_f>"
++  [(set (match_operand:LASX_DWH 0 "register_operand" "=f")
++	(unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0")
++                     (match_operand:LASX_DWH 2 "register_operand" "f")
++		     (match_operand:LASX_DWH 3 "register_operand" "f")]
+ 		    UNSPEC_LASX_XVSHUF))]
+   "ISA_HAS_LASX"
+   "xvshuf.<lasxfmt>\t%u0,%u2,%u3"
+@@ -2497,14 +2625,14 @@
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+ 
+-  (define_insn "lasx_xvreplve0_<lasxfmt_f>_scalar"
+-  [(set (match_operand:FLASX 0 "register_operand" "=f")
+-	(unspec:FLASX [(match_operand:<UNITMODE> 1 "register_operand" "f")]
+-		     UNSPEC_LASX_XVREPLVE0))]
+-  "ISA_HAS_LASX"
+-  "xvreplve0.<lasxfmt>\t%u0,%u1"
+-  [(set_attr "type" "simd_splat")
+-   (set_attr "mode" "<MODE>")])
++(define_insn "lasx_xvreplve0_<lasxfmt_f>_scalar"
++[(set (match_operand:FLASX 0 "register_operand" "=f")
++  (vec_duplicate:FLASX
++    (match_operand:<UNITMODE> 1 "register_operand" "f")))]
++ "ISA_HAS_LASX"
++ "xvreplve0.<lasxfmt>\t%u0,%u1"
++ [(set_attr "type" "simd_splat")
++  (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "lasx_xvreplve0_q"
+   [(set (match_operand:V32QI 0 "register_operand" "=f")
+@@ -2544,7 +2672,6 @@
+   "xvfcvt.s.d\t%u0,%u2,%u1\n\txvpermi.d\t%u0,%u0,0xd8"
+   [(set_attr "type" "simd_fcvt")
+    (set_attr "mode" "V8SF")
+-   (set_attr "can_delay" "no")
+    (set_attr "length" "8")])
+ 
+ ;; Define for builtin function.
+@@ -2579,7 +2706,6 @@
+   "xvpermi.d\t%u0,%u1,0xfa\n\txvfcvtl.d.s\t%u0,%u0"
+   [(set_attr "type" "simd_fcvt")
+    (set_attr "mode" "V4DF")
+-   (set_attr "can_delay" "no")
+    (set_attr "length" "12")])
+ 
+ ;; Define for builtin function.
+@@ -2614,7 +2740,6 @@
+   "xvpermi.d\t%u0,%u1,0x50\n\txvfcvtl.d.s\t%u0,%u0"
+   [(set_attr "type" "simd_fcvt")
+    (set_attr "mode" "V4DF")
+-   (set_attr "can_delay" "no")
+    (set_attr "length" "8")])
+ 
+ (define_code_attr lasxbr
+@@ -2653,8 +2778,7 @@
+ 					 "xvset<lasxne>.<lasxfmt>\t%z3%u1\n\tbcnez\t%Z3%0");
+ }
+  [(set_attr "type" "simd_branch")
+-  (set_attr "mode" "<MODE>")
+-  (set_attr "compact_form" "never")])
++  (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "lasx_<lasxbr>_v_<lasxfmt_f>"
+  [(set (pc) (if_then_else
+@@ -2672,12 +2796,8 @@
+ 					 "xvset<lasxne_v>.v\t%Z3%u1\n\tbcnez\t%Z3%0");
+ }
+  [(set_attr "type" "simd_branch")
+-  (set_attr "mode" "<MODE>")
+-  (set_attr "compact_form" "never")])
+-
+-
++  (set_attr "mode" "<MODE>")])
+ 
+-
+ ;; loongson-asx.
+ (define_insn "lasx_vext2xv_h<u>_b<u>"
+  [(set (match_operand:V16HI 0 "register_operand" "=f")
+@@ -3339,8 +3459,8 @@
+    (set_attr "mode" "V8SF")])
+ 
+ (define_insn "lasx_xvfrintrne_s"
+-  [(set (match_operand:V8SI 0 "register_operand" "=f")
+-	(unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
++  [(set (match_operand:V8SF 0 "register_operand" "=f")
++	(unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRNE_S))]
+   "ISA_HAS_LASX"
+   "xvfrintrne.s\t%u0,%u1"
+@@ -3348,8 +3468,8 @@
+    (set_attr "mode" "V8SF")])
+ 
+ (define_insn "lasx_xvfrintrne_d"
+-  [(set (match_operand:V4DI 0 "register_operand" "=f")
+-	(unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
++  [(set (match_operand:V4DF 0 "register_operand" "=f")
++	(unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRNE_D))]
+   "ISA_HAS_LASX"
+   "xvfrintrne.d\t%u0,%u1"
+@@ -3357,8 +3477,8 @@
+    (set_attr "mode" "V4DF")])
+ 
+ (define_insn "lasx_xvfrintrz_s"
+-  [(set (match_operand:V8SI 0 "register_operand" "=f")
+-	(unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
++  [(set (match_operand:V8SF 0 "register_operand" "=f")
++	(unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRZ_S))]
+   "ISA_HAS_LASX"
+   "xvfrintrz.s\t%u0,%u1"
+@@ -3366,8 +3486,8 @@
+    (set_attr "mode" "V8SF")])
+ 
+ (define_insn "lasx_xvfrintrz_d"
+-  [(set (match_operand:V4DI 0 "register_operand" "=f")
+-	(unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
++  [(set (match_operand:V4DF 0 "register_operand" "=f")
++	(unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRZ_D))]
+   "ISA_HAS_LASX"
+   "xvfrintrz.d\t%u0,%u1"
+@@ -3375,8 +3495,8 @@
+    (set_attr "mode" "V4DF")])
+ 
+ (define_insn "lasx_xvfrintrp_s"
+-  [(set (match_operand:V8SI 0 "register_operand" "=f")
+-	(unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
++  [(set (match_operand:V8SF 0 "register_operand" "=f")
++	(unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRP_S))]
+   "ISA_HAS_LASX"
+   "xvfrintrp.s\t%u0,%u1"
+@@ -3384,8 +3504,8 @@
+    (set_attr "mode" "V8SF")])
+ 
+ (define_insn "lasx_xvfrintrp_d"
+-  [(set (match_operand:V4DI 0 "register_operand" "=f")
+-	(unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
++  [(set (match_operand:V4DF 0 "register_operand" "=f")
++	(unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRP_D))]
+   "ISA_HAS_LASX"
+   "xvfrintrp.d\t%u0,%u1"
+@@ -3393,8 +3513,8 @@
+    (set_attr "mode" "V4DF")])
+ 
+ (define_insn "lasx_xvfrintrm_s"
+-  [(set (match_operand:V8SI 0 "register_operand" "=f")
+-	(unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
++  [(set (match_operand:V8SF 0 "register_operand" "=f")
++	(unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRM_S))]
+   "ISA_HAS_LASX"
+   "xvfrintrm.s\t%u0,%u1"
+@@ -3402,14 +3522,44 @@
+    (set_attr "mode" "V8SF")])
+ 
+ (define_insn "lasx_xvfrintrm_d"
+-  [(set (match_operand:V4DI 0 "register_operand" "=f")
+-	(unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
++  [(set (match_operand:V4DF 0 "register_operand" "=f")
++	(unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ 		     UNSPEC_LASX_XVFRINTRM_D))]
+   "ISA_HAS_LASX"
+   "xvfrintrm.d\t%u0,%u1"
+   [(set_attr "type" "simd_shift")
+    (set_attr "mode" "V4DF")])
+ 
++;; Vector versions of the floating-point frint patterns.
++;; Expands to btrunc, ceil, floor, rint.
++(define_insn "<frint256_pattern_s>v8sf2"
++ [(set (match_operand:V8SF 0 "register_operand" "=f")
++	(unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
++			 FRINT256_S))]
++  "ISA_HAS_LASX"
++  "xvfrint<frint256_suffix>.s\t%u0,%u1"
++  [(set_attr "type" "simd_shift")
++   (set_attr "mode" "V8SF")])
++
++(define_insn "<frint256_pattern_d>v4df2"
++ [(set (match_operand:V4DF 0 "register_operand" "=f")
++	(unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
++			 FRINT256_D))]
++  "ISA_HAS_LASX"
++  "xvfrint<frint256_suffix>.d\t%u0,%u1"
++  [(set_attr "type" "simd_shift")
++   (set_attr "mode" "V4DF")])
++
++;; Expands to round.
++(define_insn "round<mode>2"
++ [(set (match_operand:FLASX 0 "register_operand" "=f")
++	(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
++			 UNSPEC_LASX_XVFRINT))]
++  "ISA_HAS_LASX"
++  "xvfrint.<flasxfmt>\t%u0,%u1"
++  [(set_attr "type" "simd_shift")
++   (set_attr "mode" "<MODE>")])
++
+ ;; Offset load and broadcast
+ (define_expand "lasx_xvldrepl_<lasxfmt_f>"
+   [(match_operand:LASX 0 "register_operand")
+@@ -3435,6 +3585,19 @@
+    (set_attr "mode" "<MODE>")
+    (set_attr "length" "4")])
+ 
++;; Offset is "0"
++(define_insn "lasx_xvldrepl_<lasxfmt_f>_insn_0"
++  [(set (match_operand:LASX 0 "register_operand" "=f")
++    (vec_duplicate:LASX
++      (mem:<UNITMODE> (match_operand:DI 1 "register_operand" "r"))))]
++  "ISA_HAS_LASX"
++{
++    return "xvldrepl.<lasxfmt>\t%u0,%1,0";
++}
++  [(set_attr "type" "simd_load")
++   (set_attr "mode" "<MODE>")
++   (set_attr "length" "4")])
++
+ ;;XVADDWEV.H.B   XVSUBWEV.H.B   XVMULWEV.H.B
+ ;;XVADDWEV.H.BU  XVSUBWEV.H.BU  XVMULWEV.H.BU   
+ (define_insn "lasx_xv<optab>wev_h_b<u>"
+@@ -4666,16 +4829,52 @@
+   [(set_attr "type" "simd_shift")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "lasx_xvpermi_w"
+-  [(set (match_operand:V8SI 0 "register_operand" "=f")
+-	(unspec:V8SI [(match_operand:V8SI 1 "register_operand" "0")
+-               (match_operand:V8SI 2 "register_operand" "f")
+-	     	       (match_operand 3 "const_uimm8_operand" "")]
+-		     UNSPEC_LASX_XVPERMI))]
++(define_mode_attr VDOUBLEMODEW256
++  [(V8SI "V16SI")
++   (V8SF "V16SF")])
++
++(define_insn "lasx_xvpermi_<lasxfmt_f_wd>"
++  [(set (match_operand:LASX_W 0 "register_operand" "=f")
++    (unspec:LASX_W [(match_operand:LASX_W 1 "register_operand" "0")
++               (match_operand:LASX_W 2 "register_operand" "f")
++                   (match_operand 3 "const_uimm8_operand" "")]
++             UNSPEC_LASX_XVPERMI))]
+   "ISA_HAS_LASX"
+   "xvpermi.w\t%u0,%u2,%3"
+   [(set_attr "type" "simd_bit")
+-   (set_attr "mode" "V8SI")])
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "lasx_xvpermi_<lasxfmt_f_wd>_1"
++  [(set (match_operand:LASX_W 0 "register_operand" "=f")
++     (vec_select:LASX_W
++       (vec_concat:<VDOUBLEMODEW256>
++         (match_operand:LASX_W 1 "register_operand" "f")
++         (match_operand:LASX_W 2 "register_operand" "0"))
++       (parallel [(match_operand 3  "const_0_to_3_operand")
++              (match_operand 4  "const_0_to_3_operand"  )
++              (match_operand 5  "const_8_to_11_operand" )
++              (match_operand 6  "const_8_to_11_operand" )
++              (match_operand 7  "const_4_to_7_operand"  )
++              (match_operand 8  "const_4_to_7_operand"  )
++              (match_operand 9  "const_12_to_15_operand")
++              (match_operand 10 "const_12_to_15_operand")])))]
++  "ISA_HAS_LASX
++  && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
++  && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
++  && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
++  && INTVAL (operands[6]) + 4 == INTVAL (operands[10])"
++{
++  int mask = 0;
++  mask |= INTVAL (operands[3]) << 0;
++  mask |= INTVAL (operands[4]) << 2;
++  mask |= (INTVAL (operands[5]) - 8) << 4;
++  mask |= (INTVAL (operands[6]) - 8) << 6;
++  operands[3] = GEN_INT (mask);
++
++  return "xvpermi.w\t%u0,%u1,%3";
++}
++  [(set_attr "type" "simd_bit")
++   (set_attr "mode" "<MODE>")])
+ 
+ (define_expand "lasx_xvld"
+   [(match_operand:V32QI 0 "register_operand")
+@@ -4728,10 +4927,24 @@
+    (set_attr "mode" "<MODE>")
+    (set_attr "length" "4")])
+ 
+-(define_insn "lasx_xvinsve0_<lasxfmt>"
+-  [(set (match_operand:ILASX_DW 0 "register_operand" "=f")
+-	(unspec:ILASX_DW [(match_operand:ILASX_DW 1 "register_operand" "0")
+-		     (match_operand:ILASX_DW 2 "register_operand" "f")
++;; Offset is "0"
++(define_insn "lasx_xvstelm_<lasxfmt_f>_insn_0"
++  [(set (mem:<UNITMODE> (match_operand:DI 0 "register_operand" "r"))
++    (vec_select:<UNITMODE>
++      (match_operand:LASX_WD 1 "register_operand" "f")
++     (parallel [(match_operand:SI 2 "const_<indeximm256>_operand")])))]
++  "ISA_HAS_LASX"
++{
++    return "xvstelm.<lasxfmt>\t%u1,%0,0,%2";
++}
++  [(set_attr "type" "simd_store")
++   (set_attr "mode" "<MODE>")
++   (set_attr "length" "4")])
++
++(define_insn "lasx_xvinsve0_<lasxfmt_f>"
++  [(set (match_operand:LASX_WD 0 "register_operand" "=f")
++	(unspec:LASX_WD [(match_operand:LASX_WD 1 "register_operand" "0")
++		     (match_operand:LASX_WD 2 "register_operand" "f")
+ 		     (match_operand 3 "const_<indeximm256>_operand" "")]
+ 		    UNSPEC_LASX_XVINSVE0))]
+   "ISA_HAS_LASX"
+@@ -4739,6 +4952,18 @@
+   [(set_attr "type" "simd_shf")
+    (set_attr "mode" "<MODE>")])
+ 
++(define_insn "lasx_xvinsve0_<lasxfmt_f>_scalar"
++  [(set (match_operand:FLASX 0 "register_operand" "=f")
++    (vec_merge:FLASX
++      (vec_duplicate:FLASX
++        (match_operand:<UNITMODE> 1 "register_operand" "f"))
++      (match_operand:FLASX 2 "register_operand" "0")
++      (match_operand 3 "const_<bitmask256>_operand" "")))]
++  "ISA_HAS_LASX"
++  "xvinsve0.<lasxfmt>\t%u0,%u1,%y3"
++  [(set_attr "type" "simd_insert")
++   (set_attr "mode" "<MODE>")])
++
+ (define_insn "lasx_xvpickve_<lasxfmt_f>"
+   [(set (match_operand:LASX_WD 0 "register_operand" "=f")
+ 	(unspec:LASX_WD [(match_operand:LASX_WD 1 "register_operand" "f")
+@@ -4749,6 +4974,16 @@
+   [(set_attr "type" "simd_shf")
+    (set_attr "mode" "<MODE>")])
+ 
++(define_insn "lasx_xvpickve_<lasxfmt_f>_scalar"
++  [(set (match_operand:<UNITMODE> 0 "register_operand" "=f")
++    (vec_select:<UNITMODE>
++      (match_operand:FLASX 1 "register_operand" "f")
++      (parallel [(match_operand 2 "const_<indeximm256>_operand" "")])))]
++  "ISA_HAS_LASX"
++  "xvpickve.<lasxfmt>\t%u0,%u1,%2"
++  [(set_attr "type" "simd_shf")
++   (set_attr "mode" "<MODE>")])
++
+ (define_insn "lasx_xvssrlrn_<hlasxfmt>_<lasxfmt>"
+   [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ 	(unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+@@ -4823,3 +5058,142 @@
+   [(set_attr "type" "simd_store")
+    (set_attr "mode" "DI")])
+ 
++(define_insn "vec_widen_<su>mult_even_v8si"
++  [(set (match_operand:V4DI 0 "register_operand" "=f")
++    (mult:V4DI
++      (any_extend:V4DI
++        (vec_select:V4SI
++          (match_operand:V8SI 1 "register_operand" "%f")
++          (parallel [(const_int 0) (const_int 2)
++                         (const_int 4) (const_int 6)])))
++      (any_extend:V4DI
++        (vec_select:V4SI
++          (match_operand:V8SI 2 "register_operand" "f")
++          (parallel [(const_int 0) (const_int 2)
++             (const_int 4) (const_int 6)])))))]
++  "ISA_HAS_LASX"
++  "xvmulwev.d.w<u>\t%u0,%u1,%u2"
++  [(set_attr "type" "simd_int_arith")
++   (set_attr "mode" "V4DI")])
++
++;; Vector reduction operation
++(define_expand "reduc_plus_scal_v4di"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:V4DI 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (V4DImode);
++  rtx tmp1 = gen_reg_rtx (V4DImode);
++  rtx vec_res = gen_reg_rtx (V4DImode);
++  emit_insn (gen_lasx_xvhaddw_q_d (tmp, operands[1], operands[1]));
++  emit_insn (gen_lasx_xvpermi_d_v4di (tmp1, tmp, GEN_INT (2)));
++  emit_insn (gen_addv4di3 (vec_res, tmp, tmp1));
++  emit_insn (gen_vec_extractv4didi (operands[0], vec_res, const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_plus_scal_v8si"
++  [(match_operand:SI 0 "register_operand")
++   (match_operand:V8SI 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (V4DImode);
++  rtx tmp1 = gen_reg_rtx (V4DImode);
++  rtx vec_res = gen_reg_rtx (V4DImode);
++  emit_insn (gen_lasx_xvhaddw_d_w (tmp, operands[1], operands[1]));
++  emit_insn (gen_lasx_xvhaddw_q_d (tmp1, tmp, tmp));
++  emit_insn (gen_lasx_xvpermi_d_v4di (tmp, tmp1, GEN_INT (2)));
++  emit_insn (gen_addv4di3 (vec_res, tmp, tmp1));
++  emit_insn (gen_vec_extractv8sisi (operands[0], gen_lowpart(V8SImode,vec_res), const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_plus_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:FLASX 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_add<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_<optab>_scal_<mode>"
++  [(any_bitwise:<UNITMODE>
++     (match_operand:<UNITMODE> 0 "register_operand")
++     (match_operand:ILASX 1 "register_operand"))]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_<optab><mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_smax_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:LASX 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_smax<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_smin_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:LASX 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_smin<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_umax_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:ILASX 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_umax<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_umin_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:ILASX 1 "register_operand")]
++  "ISA_HAS_LASX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_umin<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++;; merge vec_unpacks_hi_v8sf/vec_unpacks_lo_v8sf
++(define_peephole
++  [(set (match_operand:V4DF 0 "register_operand")
++    (float_extend:V4DF (vec_select:V4SF
++                 (match_operand:V8SF 1 "register_operand")
++                 (parallel [(const_int 0) (const_int 1)
++                            (const_int 2) (const_int 3)]))))
++   (set (match_operand:V4DF 2 "register_operand")
++    (float_extend:V4DF (vec_select:V4SF
++                 (match_operand:V8SF 3 "register_operand")
++                 (parallel [(const_int 4) (const_int 5)
++                            (const_int 6) (const_int 7)]))))]
++  "ISA_HAS_LASX && rtx_equal_p (operands[1], operands[3])"
++{
++  return "xvpermi.d\t%u2,%u1,0xd8\n\txvfcvtl.d.s\t%u0,%u2\n\txvfcvth.d.s\t%u2,%u2";
++})
+diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
+index 185eee869..58f3047ac 100644
+--- a/gcc/config/loongarch/lasxintrin.h
++++ b/gcc/config/loongarch/lasxintrin.h
+@@ -3262,70 +3262,70 @@ __m256i __lasx_xvftintrnel_l_s(__m256 _1)
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V8SI, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrne_s(__m256 _1)
++__m256 __lasx_xvfrintrne_s(__m256 _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrne_s((v8f32)_1);
++	return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V4DI, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrne_d(__m256d _1)
++__m256d __lasx_xvfrintrne_d(__m256d _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrne_d((v4f64)_1);
++	return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V8SI, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrz_s(__m256 _1)
++__m256 __lasx_xvfrintrz_s(__m256 _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrz_s((v8f32)_1);
++	return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V4DI, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrz_d(__m256d _1)
++__m256d __lasx_xvfrintrz_d(__m256d _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrz_d((v4f64)_1);
++	return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V8SI, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrp_s(__m256 _1)
++__m256 __lasx_xvfrintrp_s(__m256 _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrp_s((v8f32)_1);
++	return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V4DI, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrp_d(__m256d _1)
++__m256d __lasx_xvfrintrp_d(__m256d _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrp_d((v4f64)_1);
++	return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V8SI, V8SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrm_s(__m256 _1)
++__m256 __lasx_xvfrintrm_s(__m256 _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrm_s((v8f32)_1);
++	return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, xj.  */
+ /* Data types in instruction templates:  V4DI, V4DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvfrintrm_d(__m256d _1)
++__m256d __lasx_xvfrintrm_d(__m256d _1)
+ {
+-	return (__m256i)__builtin_lasx_xvfrintrm_d((v4f64)_1);
++	return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1);
+ }
+ 
+ /* Assembly instruction format:          xd, rj, si12.  */
+ /* Data types in instruction templates:  V32QI, CVPOINTER, SI.  */
+-#define __lasx_xvld(/*void **/ _1, /*si12*/ _2)	((__m256i)__builtin_lasx_xvld((void *)(_1), (_2)))
++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2)	((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          xd, rj, si12.  */
+ /* Data types in instruction templates:  VOID, V32QI, CVPOINTER, SI.  */
+@@ -3426,9 +3426,9 @@ __m256i __lasx_xvorn_v(__m256i _1, __m256i _2)
+ /* Assembly instruction format:          xd, rj, rk.  */
+ /* Data types in instruction templates:  V32QI, CVPOINTER, DI.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m256i __lasx_xvldx(void * _1, long int _2)
++__m256i __lasx_xvldx(void const * _1, long int _2)
+ {
+-	return (__m256i)__builtin_lasx_xvldx((void *)_1, (long int)_2);
++	return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2);
+ }
+ 
+ /* Assembly instruction format:          xd, rj, rk.  */
+@@ -3609,19 +3609,19 @@ __m256i __lasx_xvperm_w(__m256i _1, __m256i _2)
+ 
+ /* Assembly instruction format:          xd, rj, si12.  */
+ /* Data types in instruction templates:  V32QI, CVPOINTER, SI.  */
+-#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2)	((__m256i)__builtin_lasx_xvldrepl_b((void *)(_1), (_2)))
++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2)	((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          xd, rj, si11.  */
+ /* Data types in instruction templates:  V16HI, CVPOINTER, SI.  */
+-#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2)	((__m256i)__builtin_lasx_xvldrepl_h((void *)(_1), (_2)))
++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2)	((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          xd, rj, si10.  */
+ /* Data types in instruction templates:  V8SI, CVPOINTER, SI.  */
+-#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2)	((__m256i)__builtin_lasx_xvldrepl_w((void *)(_1), (_2)))
++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2)	((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          xd, rj, si9.  */
+ /* Data types in instruction templates:  V4DI, CVPOINTER, SI.  */
+-#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2)	((__m256i)__builtin_lasx_xvldrepl_d((void *)(_1), (_2)))
++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2)	((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          rd, xj, ui3.  */
+ /* Data types in instruction templates:  SI, V8SI, UQI.  */
+diff --git a/gcc/config/loongarch/linux-common.h b/gcc/config/loongarch/linux-common.h
+deleted file mode 100644
+index 9e1a1b50f..000000000
+--- a/gcc/config/loongarch/linux-common.h
++++ /dev/null
+@@ -1,68 +0,0 @@
+-/* Definitions for LARCH running Linux-based GNU systems with ELF format.
+-   Copyright (C) 2012-2018 Free Software Foundation, Inc.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify
+-it under the terms of the GNU General Public License as published by
+-the Free Software Foundation; either version 3, or (at your option)
+-any later version.
+-
+-GCC is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-GNU General Public License for more details.
+-
+-You should have received a copy of the GNU General Public License
+-along with GCC; see the file COPYING3.  If not see
+-<http://www.gnu.org/licenses/>.  */
+-
+-#undef  TARGET_OS_CPP_BUILTINS
+-#define TARGET_OS_CPP_BUILTINS()				\
+-  do {								\
+-    GNU_USER_TARGET_OS_CPP_BUILTINS();				\
+-    /* The GNU C++ standard library requires this.  */		\
+-    if (c_dialect_cxx ())					\
+-      builtin_define ("_GNU_SOURCE");				\
+-    ANDROID_TARGET_OS_CPP_BUILTINS();				\
+-  } while (0)
+-
+-#define EXTRA_TARGET_D_OS_VERSIONS()				\
+-  ANDROID_TARGET_D_OS_VERSIONS();
+-
+-#undef  LINK_SPEC
+-#define LINK_SPEC							\
+-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LINK_SPEC,			\
+-		       GNU_USER_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+-
+-#undef  SUBTARGET_CC1_SPEC
+-#define SUBTARGET_CC1_SPEC						\
+-  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC,			\
+-		       GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC)
+-
+-#undef  CC1PLUS_SPEC
+-#define CC1PLUS_SPEC							\
+-  LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+-
+-#undef  LIB_SPEC
+-#define LIB_SPEC							\
+-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
+-		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+-
+-#undef  STARTFILE_SPEC
+-#define STARTFILE_SPEC							\
+-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+-
+-#undef  ENDFILE_SPEC
+-#define ENDFILE_SPEC							\
+-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " "		\
+-		       GNU_USER_TARGET_ENDFILE_SPEC,			\
+-		       GNU_USER_TARGET_MATHFILE_SPEC " "		\
+-		       ANDROID_ENDFILE_SPEC)
+-
+-/* Define this to be nonzero if static stack checking is supported.  */
+-#define STACK_CHECK_STATIC_BUILTIN 1
+-
+-/* FIXME*/
+-/* The default value isn't sufficient in 64-bit mode.  */
+-#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h
+index 520a8ef32..59854251f 100644
+--- a/gcc/config/loongarch/linux.h
++++ b/gcc/config/loongarch/linux.h
+@@ -1,4 +1,4 @@
+-/* Definitions for LARCH running Linux-based GNU systems with ELF format.
++/* Definitions for Linux-based systems with libraries in ELF format.
+    Copyright (C) 1998-2018 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+@@ -17,17 +17,34 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+-#define GNU_USER_LINK_EMULATION32 "elf32loongarch"
+-#define GNU_USER_LINK_EMULATION64 "elf64loongarch"
++/* Default system library search paths.
++ * This ensures that a compiler configured with --disable-multilib
++ * can work in a multilib environment.  */
+ 
+-#define GLIBC_DYNAMIC_LINKERLP32 \
+-  "/lib32/ld.so.1"
+-#define GLIBC_DYNAMIC_LINKERLP64 \
+-  "/lib64/ld.so.1"
++#if defined(LA_DISABLE_MULTILIB) && defined(LA_DISABLE_MULTIARCH)
+ 
+-#define GNU_USER_DYNAMIC_LINKERLP32 GLIBC_DYNAMIC_LINKERLP32
+-#define GNU_USER_DYNAMIC_LINKERLP64 GLIBC_DYNAMIC_LINKERLP64
++  #if DEFAULT_ABI_BASE == ABI_BASE_LP64D
++    #define ABI_LIBDIR "lib64"
++  #elif DEFAULT_ABI_BASE == ABI_BASE_LP64F
++    #define ABI_LIBDIR "lib64/f32"
++  #elif DEFAULT_ABI_BASE == ABI_BASE_LP64S
++    #define ABI_LIBDIR "lib64/sf"
++  #endif
+ 
++#endif
++
++#ifndef ABI_LIBDIR
++#define ABI_LIBDIR "lib"
++#endif
++
++#define STANDARD_STARTFILE_PREFIX_1 "/" ABI_LIBDIR "/"
++#define STANDARD_STARTFILE_PREFIX_2 "/usr/" ABI_LIBDIR "/"
++
++
++/* Define this to be nonzero if static stack checking is supported.  */
++#define STACK_CHECK_STATIC_BUILTIN 1
++
++/* The default value isn't sufficient in 64-bit mode.  */
++#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+ 
+-#undef TARGET_ASM_FILE_END
+ #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+diff --git a/gcc/config/loongarch/loongarch-builtins.c b/gcc/config/loongarch/loongarch-builtins.c
+index 9fa68b11f..b326ec46c 100644
+--- a/gcc/config/loongarch/loongarch-builtins.c
++++ b/gcc/config/loongarch/loongarch-builtins.c
+@@ -1,7 +1,6 @@
+-
+-/* Subroutines used for expanding LOONGARCH builtins.
+-   Copyright (C) 2011-2018 Free Software Foundation, Inc.
+-   Contributed by Andrew Waterman (andrew@sifive.com).
++/* Subroutines used for expanding LoongArch builtins.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Co. Ltd.
+ 
+ This file is part of GCC.
+ 
+@@ -30,50 +29,29 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree.h"
+ #include "memmodel.h"
+ #include "gimple.h"
+-#include "cfghooks.h"
+-#include "df.h"
+ #include "tm_p.h"
+-#include "stringpool.h"
+-#include "attribs.h"
+ #include "optabs.h"
+-#include "regs.h"
+-#include "emit-rtl.h"
+ #include "recog.h"
+-#include "cgraph.h"
+ #include "diagnostic.h"
+-#include "insn-attr.h"
+-#include "output.h"
+-#include "alias.h"
+ #include "fold-const.h"
+-#include "varasm.h"
+-#include "stor-layout.h"
+-#include "calls.h"
+-#include "explow.h"
+ #include "expr.h"
+-#include "libfuncs.h"
+-#include "reload.h"
+-#include "common/common-target.h"
+ #include "langhooks.h"
+-#include "cfgrtl.h"
+-#include "cfganal.h"
+-#include "sched-int.h"
+-#include "gimplify.h"
+-#include "target-globals.h"
+-#include "tree-pass.h"
+-#include "context.h"
++#include "emit-rtl.h"
++#include "explow.h"
+ #include "builtins.h"
+-#include "rtl-iter.h"
++#include "stringpool.h"
++#include "case-cfn-macros.h"
+ 
+-/* This file should be included last.  */
+-#include "target-def.h"
+ /* Macros to create an enumeration identifier for a function prototype.  */
+ #define LARCH_FTYPE_NAME1(A, B) LARCH_##A##_FTYPE_##B
+ #define LARCH_FTYPE_NAME2(A, B, C) LARCH_##A##_FTYPE_##B##_##C
+ #define LARCH_FTYPE_NAME3(A, B, C, D) LARCH_##A##_FTYPE_##B##_##C##_##D
+-#define LARCH_FTYPE_NAME4(A, B, C, D, E) LARCH_##A##_FTYPE_##B##_##C##_##D##_##E
++#define LARCH_FTYPE_NAME4(A, B, C, D, E) \
++  LARCH_##A##_FTYPE_##B##_##C##_##D##_##E
+ 
+ /* Classifies the prototype of a built-in function.  */
+-enum loongarch_function_type {
++enum loongarch_function_type
++{
+ #define DEF_LARCH_FTYPE(NARGS, LIST) LARCH_FTYPE_NAME##NARGS LIST,
+ #include "config/loongarch/loongarch-ftypes.def"
+ #undef DEF_LARCH_FTYPE
+@@ -81,7 +59,8 @@ enum loongarch_function_type {
+ };
+ 
+ /* Specifies how a built-in function should be converted into rtl.  */
+-enum loongarch_builtin_type {
++enum loongarch_builtin_type
++{
+   /* The function corresponds directly to an .md pattern.  The return
+      value is mapped to operand 0 and the arguments are mapped to
+      operands 1 and above.  */
+@@ -91,23 +70,23 @@ enum loongarch_builtin_type {
+      value and the arguments are mapped to operands 0 and above.  */
+   LARCH_BUILTIN_DIRECT_NO_TARGET,
+ 
++  /* For generating LoongArch LSX.  */
++  LARCH_BUILTIN_LSX,
++
+   /* The function corresponds to an LSX conditional branch instruction
+      combined with a compare instruction.  */
+   LARCH_BUILTIN_LSX_TEST_BRANCH,
+ 
+-  /* For generating LoongArch LSX.  */
+-  LARCH_BUILTIN_LSX,
+-
+   /* For generating LoongArch LASX.  */
+   LARCH_BUILTIN_LASX,
+ 
+   /* The function corresponds to an LASX conditional branch instruction
+      combined with a compare instruction.  */
+-  LARCH_BUILTIN_LASX_TEST_BRANCH,
++  LARCH_BUILTIN_LASX_TEST_BRANCH
+ 
+ };
+ 
+-/* Invoke MACRO (COND) for each C.cond.fmt condition.  */
++/* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition.  */
+ #define LARCH_FP_CONDITIONS(MACRO) \
+   MACRO (f),	\
+   MACRO (un),	\
+@@ -127,26 +106,27 @@ enum loongarch_builtin_type {
+   MACRO (ngt)
+ 
+ /* Enumerates the codes above as LARCH_FP_COND_<X>.  */
+-#define DECLARE_LARCH_COND(X) LARCH_FP_COND_ ## X
+-enum loongarch_fp_condition {
++#define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X
++enum loongarch_fp_condition
++{
+   LARCH_FP_CONDITIONS (DECLARE_LARCH_COND)
+ };
+ #undef DECLARE_LARCH_COND
+ 
+ /* Index X provides the string representation of LARCH_FP_COND_<X>.  */
+ #define STRINGIFY(X) #X
+-const char *const loongarch_fp_conditions[16] = {
+-  LARCH_FP_CONDITIONS (STRINGIFY)
+-};
++const char *const
++loongarch_fp_conditions[16] = {LARCH_FP_CONDITIONS (STRINGIFY)};
+ #undef STRINGIFY
+-/* Declare an availability predicate for built-in functions that require 
++
++/* Declare an availability predicate for built-in functions that require
+  * COND to be true.  NAME is the main part of the predicate's name.  */
+-#define AVAIL_ALL(NAME, COND)						\
+- static unsigned int							\
+- loongarch_builtin_avail_##NAME (void)					\
+- {									\
+-   return (COND) ? 1 : 0;	\
+- }
++#define AVAIL_ALL(NAME, COND) \
++  static unsigned int \
++  loongarch_builtin_avail_##NAME (void) \
++  { \
++    return (COND) ? 1 : 0; \
++  }
+ 
+ static unsigned int
+ loongarch_builtin_avail_default (void)
+@@ -154,14 +134,12 @@ loongarch_builtin_avail_default (void)
+   return 1;
+ }
+ /* This structure describes a single built-in function.  */
+-struct loongarch_builtin_description {
++struct loongarch_builtin_description
++{
+   /* The code of the main .md file instruction.  See loongarch_builtin_type
+      for more information.  */
+   enum insn_code icode;
+ 
+-  /* The floating-point comparison code to use with ICODE, if any.  */
+-  enum loongarch_fp_condition cond;
+-
+   /* The name of the built-in function.  */
+   const char *name;
+ 
+@@ -176,8 +154,8 @@ struct loongarch_builtin_description {
+ };
+ 
+ AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
+-AVAIL_ALL (lsx, TARGET_LSX)
+-AVAIL_ALL (lasx, TARGET_LASX)
++AVAIL_ALL (lsx, ISA_HAS_LSX)
++AVAIL_ALL (lasx, ISA_HAS_LASX)
+ 
+ /* Construct a loongarch_builtin_description from the given arguments.
+ 
+@@ -194,31 +172,32 @@ AVAIL_ALL (lasx, TARGET_LASX)
+ 
+    AVAIL is the name of the availability predicate, without the leading
+    loongarch_builtin_avail_.  */
+-#define LARCH_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE,			\
+-		     FUNCTION_TYPE, AVAIL)				\
+-  { CODE_FOR_loongarch_ ## INSN, LARCH_FP_COND_ ## COND,			\
+-    "__builtin_loongarch_" NAME, BUILTIN_TYPE, FUNCTION_TYPE,		\
+-    loongarch_builtin_avail_ ## AVAIL }
++#define LARCH_BUILTIN(INSN, NAME, BUILTIN_TYPE, FUNCTION_TYPE, AVAIL) \
++  { \
++    CODE_FOR_loongarch_##INSN, "__builtin_loongarch_" NAME, \
++      BUILTIN_TYPE, FUNCTION_TYPE, \
++      loongarch_builtin_avail_##AVAIL \
++  }
+ 
+ /* Define __builtin_loongarch_<INSN>, which is a LARCH_BUILTIN_DIRECT function
+    mapped to instruction CODE_FOR_loongarch_<INSN>,  FUNCTION_TYPE and AVAIL
+    are as for LARCH_BUILTIN.  */
+-#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)			\
+-  LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL)
++#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
++  LARCH_BUILTIN (INSN, #INSN, LARCH_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL)
+ 
+ /* Define __builtin_loongarch_<INSN>, which is a LARCH_BUILTIN_DIRECT_NO_TARGET
+    function mapped to instruction CODE_FOR_loongarch_<INSN>,  FUNCTION_TYPE
+    and AVAIL are as for LARCH_BUILTIN.  */
+-#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)		\
+-  LARCH_BUILTIN (INSN, f, #INSN,	LARCH_BUILTIN_DIRECT_NO_TARGET,		\
+-		FUNCTION_TYPE, AVAIL)
++#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
++  LARCH_BUILTIN (INSN, #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \
++		 FUNCTION_TYPE, AVAIL)
+ 
+ /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_<INSN>
+    for instruction CODE_FOR_lsx_<INSN>.  FUNCTION_TYPE is a builtin_description
+    field.  */
+ #define LSX_BUILTIN(INSN, FUNCTION_TYPE)				\
+-    { CODE_FOR_lsx_ ## INSN, LARCH_FP_COND_f,				\
+-    "__builtin_lsx_" #INSN,  LARCH_BUILTIN_DIRECT,			\
++    { CODE_FOR_lsx_ ## INSN,						\
++    "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT,			\
+     FUNCTION_TYPE, loongarch_builtin_avail_lsx }
+ 
+ 
+@@ -226,7 +205,7 @@ AVAIL_ALL (lasx, TARGET_LASX)
+    for instruction CODE_FOR_lsx_<INSN>.  FUNCTION_TYPE is a builtin_description
+    field.  */
+ #define LSX_BUILTIN_TEST_BRANCH(INSN, FUNCTION_TYPE)			\
+-    { CODE_FOR_lsx_ ## INSN, LARCH_FP_COND_f,				\
++    { CODE_FOR_lsx_ ## INSN,		     				\
+     "__builtin_lsx_" #INSN, LARCH_BUILTIN_LSX_TEST_BRANCH,		\
+     FUNCTION_TYPE, loongarch_builtin_avail_lsx }
+ 
+@@ -234,7 +213,7 @@ AVAIL_ALL (lasx, TARGET_LASX)
+    for instruction CODE_FOR_lsx_<INSN>.  FUNCTION_TYPE is a builtin_description
+    field.  */
+ #define LSX_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE)			\
+-    { CODE_FOR_lsx_ ## INSN, LARCH_FP_COND_f,				\
++    { CODE_FOR_lsx_ ## INSN,						\
+     "__builtin_lsx_" #INSN,  LARCH_BUILTIN_DIRECT_NO_TARGET,		\
+     FUNCTION_TYPE, loongarch_builtin_avail_lsx }
+ 
+@@ -242,7 +221,7 @@ AVAIL_ALL (lasx, TARGET_LASX)
+    for instruction CODE_FOR_lasx_<INSN>.  FUNCTION_TYPE is a builtin_description
+    field.  */
+ #define LASX_BUILTIN(INSN, FUNCTION_TYPE)				\
+-    { CODE_FOR_lasx_ ## INSN, LARCH_FP_COND_f,				\
++    { CODE_FOR_lasx_ ## INSN,						\
+     "__builtin_lasx_" #INSN,  LARCH_BUILTIN_LASX,			\
+     FUNCTION_TYPE, loongarch_builtin_avail_lasx }
+ 
+@@ -250,7 +229,7 @@ AVAIL_ALL (lasx, TARGET_LASX)
+    for instruction CODE_FOR_lasx_<INSN>.  FUNCTION_TYPE is a builtin_description
+    field.  */
+ #define LASX_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE)			\
+-    { CODE_FOR_lasx_ ## INSN, LARCH_FP_COND_f,				\
++    { CODE_FOR_lasx_ ## INSN,						\
+     "__builtin_lasx_" #INSN,  LARCH_BUILTIN_DIRECT_NO_TARGET,		\
+     FUNCTION_TYPE, loongarch_builtin_avail_lasx }
+ 
+@@ -258,65 +237,10 @@ AVAIL_ALL (lasx, TARGET_LASX)
+    for instruction CODE_FOR_lasx_<INSN>.  FUNCTION_TYPE is a builtin_description
+    field.  */
+ #define LASX_BUILTIN_TEST_BRANCH(INSN, FUNCTION_TYPE)			\
+-    { CODE_FOR_lasx_ ## INSN, LARCH_FP_COND_f,				\
++    { CODE_FOR_lasx_ ## INSN,						\
+     "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX_TEST_BRANCH,		\
+     FUNCTION_TYPE, loongarch_builtin_avail_lasx }
+ 
+-/* LoongArch BASE instructions define CODE_FOR_loongarch_xxx */
+-#define	CODE_FOR_loongarch_fmax_sf	CODE_FOR_smaxsf3
+-#define	CODE_FOR_loongarch_fmax_df	CODE_FOR_smaxdf3
+-#define	CODE_FOR_loongarch_fmin_sf	CODE_FOR_sminsf3
+-#define	CODE_FOR_loongarch_fmin_df	CODE_FOR_smindf3
+-#define	CODE_FOR_loongarch_fmaxa_sf	CODE_FOR_smaxasf3
+-#define	CODE_FOR_loongarch_fmaxa_df	CODE_FOR_smaxadf3
+-#define	CODE_FOR_loongarch_fmina_sf	CODE_FOR_sminasf3
+-#define	CODE_FOR_loongarch_fmina_df	CODE_FOR_sminadf3
+-#define	CODE_FOR_loongarch_fclass_s	CODE_FOR_fclass_s
+-#define	CODE_FOR_loongarch_fclass_d	CODE_FOR_fclass_d
+-#define CODE_FOR_loongarch_frint_s   CODE_FOR_frint_s
+-#define CODE_FOR_loongarch_frint_d   CODE_FOR_frint_d
+-#define	CODE_FOR_loongarch_bytepick_w	CODE_FOR_bytepick_w
+-#define	CODE_FOR_loongarch_bytepick_d	CODE_FOR_bytepick_d
+-#define	CODE_FOR_loongarch_bitrev_4b	CODE_FOR_bitrev_4b
+-#define	CODE_FOR_loongarch_bitrev_8b	CODE_FOR_bitrev_8b
+-
+-/* LoongArch support crc */
+-#define	CODE_FOR_loongarch_crc_w_b_w	CODE_FOR_crc_w_b_w
+-#define	CODE_FOR_loongarch_crc_w_h_w	CODE_FOR_crc_w_h_w
+-#define	CODE_FOR_loongarch_crc_w_w_w	CODE_FOR_crc_w_w_w
+-#define	CODE_FOR_loongarch_crc_w_d_w	CODE_FOR_crc_w_d_w
+-#define	CODE_FOR_loongarch_crcc_w_b_w	CODE_FOR_crcc_w_b_w
+-#define	CODE_FOR_loongarch_crcc_w_h_w	CODE_FOR_crcc_w_h_w
+-#define	CODE_FOR_loongarch_crcc_w_w_w	CODE_FOR_crcc_w_w_w
+-#define	CODE_FOR_loongarch_crcc_w_d_w	CODE_FOR_crcc_w_d_w
+-
+-/* Privileged state instruction */
+-#define CODE_FOR_loongarch_cpucfg CODE_FOR_cpucfg
+-#define CODE_FOR_loongarch_asrtle_d CODE_FOR_asrtle_d
+-#define CODE_FOR_loongarch_asrtgt_d CODE_FOR_asrtgt_d
+-#define CODE_FOR_loongarch_csrrd CODE_FOR_csrrd
+-#define CODE_FOR_loongarch_dcsrrd CODE_FOR_dcsrrd
+-#define CODE_FOR_loongarch_csrwr CODE_FOR_csrwr
+-#define CODE_FOR_loongarch_dcsrwr CODE_FOR_dcsrwr
+-#define CODE_FOR_loongarch_csrxchg CODE_FOR_csrxchg
+-#define CODE_FOR_loongarch_dcsrxchg CODE_FOR_dcsrxchg
+-#define CODE_FOR_loongarch_iocsrrd_b CODE_FOR_iocsrrd_b
+-#define CODE_FOR_loongarch_iocsrrd_h CODE_FOR_iocsrrd_h
+-#define CODE_FOR_loongarch_iocsrrd_w CODE_FOR_iocsrrd_w
+-#define CODE_FOR_loongarch_iocsrrd_d CODE_FOR_iocsrrd_d
+-#define CODE_FOR_loongarch_iocsrwr_b CODE_FOR_iocsrwr_b
+-#define CODE_FOR_loongarch_iocsrwr_h CODE_FOR_iocsrwr_h
+-#define CODE_FOR_loongarch_iocsrwr_w CODE_FOR_iocsrwr_w
+-#define CODE_FOR_loongarch_iocsrwr_d CODE_FOR_iocsrwr_d
+-#define CODE_FOR_loongarch_lddir CODE_FOR_lddir
+-#define CODE_FOR_loongarch_dlddir CODE_FOR_dlddir
+-#define CODE_FOR_loongarch_ldpte CODE_FOR_ldpte
+-#define CODE_FOR_loongarch_dldpte CODE_FOR_dldpte
+-#define CODE_FOR_loongarch_cacop CODE_FOR_cacop
+-#define CODE_FOR_loongarch_dcacop CODE_FOR_dcacop
+-#define CODE_FOR_loongarch_dbar CODE_FOR_dbar
+-#define CODE_FOR_loongarch_ibar CODE_FOR_ibar
+-
+ /* LoongArch SX define CODE_FOR_lsx_xxx */
+ #define CODE_FOR_lsx_vsadd_b CODE_FOR_ssaddv16qi3
+ #define CODE_FOR_lsx_vsadd_h CODE_FOR_ssaddv8hi3
+@@ -389,6 +313,8 @@ AVAIL_ALL (lasx, TARGET_LASX)
+ #define CODE_FOR_lsx_vfmin_d CODE_FOR_sminv2df3
+ #define CODE_FOR_lsx_vfsqrt_s CODE_FOR_sqrtv4sf2
+ #define CODE_FOR_lsx_vfsqrt_d CODE_FOR_sqrtv2df2
++#define CODE_FOR_lsx_vflogb_s CODE_FOR_logbv4sf2
++#define CODE_FOR_lsx_vflogb_d CODE_FOR_logbv2df2
+ #define CODE_FOR_lsx_vmax_b CODE_FOR_smaxv16qi3
+ #define CODE_FOR_lsx_vmax_h CODE_FOR_smaxv8hi3
+ #define CODE_FOR_lsx_vmax_w CODE_FOR_smaxv4si3
+@@ -654,6 +580,8 @@ AVAIL_ALL (lasx, TARGET_LASX)
+ #define	CODE_FOR_lasx_xvfmin_d	CODE_FOR_sminv4df3
+ #define	CODE_FOR_lasx_xvfsqrt_s	CODE_FOR_sqrtv8sf2
+ #define	CODE_FOR_lasx_xvfsqrt_d	CODE_FOR_sqrtv4df2
++#define CODE_FOR_lasx_xvflogb_s CODE_FOR_logbv8sf2
++#define CODE_FOR_lasx_xvflogb_d CODE_FOR_logbv4df2
+ #define	CODE_FOR_lasx_xvmax_b	CODE_FOR_smaxv32qi3
+ #define	CODE_FOR_lasx_xvmax_h	CODE_FOR_smaxv16hi3
+ #define	CODE_FOR_lasx_xvmax_w	CODE_FOR_smaxv8si3
+@@ -771,6 +699,7 @@ AVAIL_ALL (lasx, TARGET_LASX)
+ #define	CODE_FOR_lasx_xvfnmsub_d	CODE_FOR_xvfnmsubv4df4_nmsub4
+ 
+ #define	CODE_FOR_lasx_xvpermi_q	CODE_FOR_lasx_xvpermi_q_v32qi
++#define CODE_FOR_lasx_xvpermi_d CODE_FOR_lasx_xvpermi_d_v4di
+ #define	CODE_FOR_lasx_xbnz_v	CODE_FOR_lasx_xbnz_v_b
+ #define	CODE_FOR_lasx_xbz_v	CODE_FOR_lasx_xbz_v_b
+ 
+@@ -857,36 +786,17 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
+ #define LARCH_MOVGR2FCSR 1
+   DIRECT_NO_TARGET_BUILTIN (movgr2fcsr, LARCH_VOID_FTYPE_UQI_USI, hard_float),
+ 
+-  DIRECT_NO_TARGET_BUILTIN (cacop, LARCH_VOID_FTYPE_USI_USI_SI, default),
+-  DIRECT_NO_TARGET_BUILTIN (dcacop, LARCH_VOID_FTYPE_USI_UDI_SI, default),
++  DIRECT_NO_TARGET_BUILTIN (cacop_w, LARCH_VOID_FTYPE_USI_USI_SI, default),
++  DIRECT_NO_TARGET_BUILTIN (cacop_d, LARCH_VOID_FTYPE_USI_UDI_SI, default),
+   DIRECT_NO_TARGET_BUILTIN (dbar, LARCH_VOID_FTYPE_USI, default),
+   DIRECT_NO_TARGET_BUILTIN (ibar, LARCH_VOID_FTYPE_USI, default),
+ 
+-  DIRECT_BUILTIN (fmax_sf, LARCH_SF_FTYPE_SF_SF, hard_float),
+-  DIRECT_BUILTIN (fmax_df, LARCH_DF_FTYPE_DF_DF, hard_float),
+-  DIRECT_BUILTIN (fmin_sf, LARCH_SF_FTYPE_SF_SF, hard_float),
+-  DIRECT_BUILTIN (fmin_df, LARCH_DF_FTYPE_DF_DF, hard_float),
+-  DIRECT_BUILTIN (fmaxa_sf, LARCH_SF_FTYPE_SF_SF, hard_float),
+-  DIRECT_BUILTIN (fmaxa_df, LARCH_DF_FTYPE_DF_DF, hard_float),
+-  DIRECT_BUILTIN (fmina_sf, LARCH_SF_FTYPE_SF_SF, hard_float),
+-  DIRECT_BUILTIN (fmina_df, LARCH_DF_FTYPE_DF_DF, hard_float),
+-  DIRECT_BUILTIN (fclass_s, LARCH_SF_FTYPE_SF, hard_float),
+-  DIRECT_BUILTIN (fclass_d, LARCH_DF_FTYPE_DF, hard_float),
+-  DIRECT_BUILTIN (frint_s, LARCH_SF_FTYPE_SF, hard_float),
+-  DIRECT_BUILTIN (frint_d, LARCH_DF_FTYPE_DF, hard_float),
+-  DIRECT_BUILTIN (bytepick_w, LARCH_SI_FTYPE_SI_SI_QI, default),
+-  DIRECT_BUILTIN (bytepick_d, LARCH_DI_FTYPE_DI_DI_QI, default),
+-  DIRECT_BUILTIN (bitrev_4b, LARCH_SI_FTYPE_SI, default),
+-  DIRECT_BUILTIN (bitrev_8b, LARCH_DI_FTYPE_DI, default),
+-  DIRECT_BUILTIN (cpucfg, LARCH_USI_FTYPE_USI, default),
+-  DIRECT_BUILTIN (asrtle_d, LARCH_VOID_FTYPE_DI_DI, default),
+-  DIRECT_BUILTIN (asrtgt_d, LARCH_VOID_FTYPE_DI_DI, default),
+-  DIRECT_BUILTIN (dlddir, LARCH_DI_FTYPE_DI_UQI, default),
+-  DIRECT_BUILTIN (lddir, LARCH_SI_FTYPE_SI_UQI, default),
+-  DIRECT_NO_TARGET_BUILTIN (dldpte, LARCH_VOID_FTYPE_DI_UQI, default),
+-  DIRECT_NO_TARGET_BUILTIN (ldpte, LARCH_VOID_FTYPE_SI_UQI, default),
++  DIRECT_BUILTIN (lddir_d, LARCH_DI_FTYPE_DI_UQI, default),
++  DIRECT_BUILTIN (lddir_w, LARCH_SI_FTYPE_SI_UQI, default),
++  DIRECT_NO_TARGET_BUILTIN (ldpte_d, LARCH_VOID_FTYPE_DI_UQI, default),
++  DIRECT_NO_TARGET_BUILTIN (ldpte_w, LARCH_VOID_FTYPE_SI_UQI, default),
+ 
+- /* CRC Instrinsic */
++  /* CRC Instrinsic */
+ 
+   DIRECT_BUILTIN (crc_w_b_w, LARCH_SI_FTYPE_QI_SI, default),
+   DIRECT_BUILTIN (crc_w_h_w, LARCH_SI_FTYPE_HI_SI, default),
+@@ -897,12 +807,12 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
+   DIRECT_BUILTIN (crcc_w_w_w, LARCH_SI_FTYPE_SI_SI, default),
+   DIRECT_BUILTIN (crcc_w_d_w, LARCH_SI_FTYPE_DI_SI, default),
+ 
+-  DIRECT_BUILTIN (csrrd, LARCH_USI_FTYPE_USI, default),
+-  DIRECT_BUILTIN (dcsrrd, LARCH_UDI_FTYPE_USI, default),
+-  DIRECT_BUILTIN (csrwr, LARCH_USI_FTYPE_USI_USI, default),
+-  DIRECT_BUILTIN (dcsrwr, LARCH_UDI_FTYPE_UDI_USI, default),
+-  DIRECT_BUILTIN (csrxchg, LARCH_USI_FTYPE_USI_USI_USI, default),
+-  DIRECT_BUILTIN (dcsrxchg, LARCH_UDI_FTYPE_UDI_UDI_USI, default),
++  DIRECT_BUILTIN (csrrd_w, LARCH_USI_FTYPE_USI, default),
++  DIRECT_BUILTIN (csrrd_d, LARCH_UDI_FTYPE_USI, default),
++  DIRECT_BUILTIN (csrwr_w, LARCH_USI_FTYPE_USI_USI, default),
++  DIRECT_BUILTIN (csrwr_d, LARCH_UDI_FTYPE_UDI_USI, default),
++  DIRECT_BUILTIN (csrxchg_w, LARCH_USI_FTYPE_USI_USI_USI, default),
++  DIRECT_BUILTIN (csrxchg_d, LARCH_UDI_FTYPE_UDI_UDI_USI, default),
+   DIRECT_BUILTIN (iocsrrd_b, LARCH_UQI_FTYPE_USI, default),
+   DIRECT_BUILTIN (iocsrrd_h, LARCH_UHI_FTYPE_USI, default),
+   DIRECT_BUILTIN (iocsrrd_w, LARCH_USI_FTYPE_USI, default),
+@@ -912,6 +822,12 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
+   DIRECT_NO_TARGET_BUILTIN (iocsrwr_w, LARCH_VOID_FTYPE_USI_USI, default),
+   DIRECT_NO_TARGET_BUILTIN (iocsrwr_d, LARCH_VOID_FTYPE_UDI_USI, default),
+ 
++  DIRECT_BUILTIN (cpucfg, LARCH_USI_FTYPE_USI, default),
++  DIRECT_NO_TARGET_BUILTIN (asrtle_d, LARCH_VOID_FTYPE_DI_DI, default),
++  DIRECT_NO_TARGET_BUILTIN (asrtgt_d, LARCH_VOID_FTYPE_DI_DI, default),
++  DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default),
++  DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default),
++
+   /* Built-in functions for LSX.  */
+   LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI),
+   LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI),
+@@ -1439,14 +1355,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
+   LSX_BUILTIN (vftintrmh_l_s, LARCH_V2DI_FTYPE_V4SF),
+   LSX_BUILTIN (vftintrnel_l_s, LARCH_V2DI_FTYPE_V4SF),
+   LSX_BUILTIN (vftintrneh_l_s, LARCH_V2DI_FTYPE_V4SF),
+-  LSX_BUILTIN (vfrintrne_s, LARCH_V4SI_FTYPE_V4SF),
+-  LSX_BUILTIN (vfrintrne_d, LARCH_V2DI_FTYPE_V2DF),
+-  LSX_BUILTIN (vfrintrz_s, LARCH_V4SI_FTYPE_V4SF),
+-  LSX_BUILTIN (vfrintrz_d, LARCH_V2DI_FTYPE_V2DF),
+-  LSX_BUILTIN (vfrintrp_s, LARCH_V4SI_FTYPE_V4SF),
+-  LSX_BUILTIN (vfrintrp_d, LARCH_V2DI_FTYPE_V2DF),
+-  LSX_BUILTIN (vfrintrm_s, LARCH_V4SI_FTYPE_V4SF),
+-  LSX_BUILTIN (vfrintrm_d, LARCH_V2DI_FTYPE_V2DF),
++  LSX_BUILTIN (vfrintrne_s, LARCH_V4SF_FTYPE_V4SF),
++  LSX_BUILTIN (vfrintrne_d, LARCH_V2DF_FTYPE_V2DF),
++  LSX_BUILTIN (vfrintrz_s, LARCH_V4SF_FTYPE_V4SF),
++  LSX_BUILTIN (vfrintrz_d, LARCH_V2DF_FTYPE_V2DF),
++  LSX_BUILTIN (vfrintrp_s, LARCH_V4SF_FTYPE_V4SF),
++  LSX_BUILTIN (vfrintrp_d, LARCH_V2DF_FTYPE_V2DF),
++  LSX_BUILTIN (vfrintrm_s, LARCH_V4SF_FTYPE_V4SF),
++  LSX_BUILTIN (vfrintrm_d, LARCH_V2DF_FTYPE_V2DF),
+   LSX_NO_TARGET_BUILTIN (vstelm_b, LARCH_VOID_FTYPE_V16QI_CVPOINTER_SI_UQI),
+   LSX_NO_TARGET_BUILTIN (vstelm_h, LARCH_VOID_FTYPE_V8HI_CVPOINTER_SI_UQI),
+   LSX_NO_TARGET_BUILTIN (vstelm_w, LARCH_VOID_FTYPE_V4SI_CVPOINTER_SI_UQI),
+@@ -2152,14 +2068,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
+   LASX_BUILTIN (xvftintrml_l_s, LARCH_V4DI_FTYPE_V8SF),
+   LASX_BUILTIN (xvftintrneh_l_s, LARCH_V4DI_FTYPE_V8SF),
+   LASX_BUILTIN (xvftintrnel_l_s, LARCH_V4DI_FTYPE_V8SF),
+-  LASX_BUILTIN (xvfrintrne_s, LARCH_V8SI_FTYPE_V8SF),
+-  LASX_BUILTIN (xvfrintrne_d, LARCH_V4DI_FTYPE_V4DF),
+-  LASX_BUILTIN (xvfrintrz_s, LARCH_V8SI_FTYPE_V8SF),
+-  LASX_BUILTIN (xvfrintrz_d, LARCH_V4DI_FTYPE_V4DF),
+-  LASX_BUILTIN (xvfrintrp_s, LARCH_V8SI_FTYPE_V8SF),
+-  LASX_BUILTIN (xvfrintrp_d, LARCH_V4DI_FTYPE_V4DF),
+-  LASX_BUILTIN (xvfrintrm_s, LARCH_V8SI_FTYPE_V8SF),
+-  LASX_BUILTIN (xvfrintrm_d, LARCH_V4DI_FTYPE_V4DF),
++  LASX_BUILTIN (xvfrintrne_s, LARCH_V8SF_FTYPE_V8SF),
++  LASX_BUILTIN (xvfrintrne_d, LARCH_V4DF_FTYPE_V4DF),
++  LASX_BUILTIN (xvfrintrz_s, LARCH_V8SF_FTYPE_V8SF),
++  LASX_BUILTIN (xvfrintrz_d, LARCH_V4DF_FTYPE_V4DF),
++  LASX_BUILTIN (xvfrintrp_s, LARCH_V8SF_FTYPE_V8SF),
++  LASX_BUILTIN (xvfrintrp_d, LARCH_V4DF_FTYPE_V4DF),
++  LASX_BUILTIN (xvfrintrm_s, LARCH_V8SF_FTYPE_V8SF),
++  LASX_BUILTIN (xvfrintrm_d, LARCH_V4DF_FTYPE_V4DF),
+   LASX_BUILTIN (xvld, LARCH_V32QI_FTYPE_CVPOINTER_SI),
+   LASX_NO_TARGET_BUILTIN (xvst, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI),
+   LASX_NO_TARGET_BUILTIN (xvstelm_b, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI_UQI),
+@@ -2391,6 +2307,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
+   LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI),
+ };
+ 
++/* Index I is the function declaration for loongarch_builtins[I], or null if
++   the function isn't defined on this target.  */
++static GTY (()) tree loongarch_builtin_decls[ARRAY_SIZE (loongarch_builtins)];
++/* Get the index I of the function declaration for loongarch_builtin_decls[I]
++   using the instruction code or return null if not defined for the target.  */
++static GTY (()) int loongarch_get_builtin_decl_index[NUM_INSN_CODES];
++
++/* Return a type for 'const volatile void*'.  */
++
++static tree
++loongarch_build_cvpointer_type (void)
++{
++  static tree cache;
++
++  if (cache == NULL_TREE)
++    cache = build_pointer_type (build_qualified_type (void_type_node,
++						      TYPE_QUAL_CONST
++						      | TYPE_QUAL_VOLATILE));
++  return cache;
++}
++
+ 
+ /* MODE is a vector mode whose elements have type TYPE.  Return the type
+    of the vector itself.  */
+@@ -2411,26 +2348,12 @@ loongarch_builtin_vector_type (tree type, machine_mode mode)
+   return types[mode_index];
+ }
+ 
+-/* Return a type for 'const volatile void *'.  */
+-
+-static tree
+-loongarch_build_cvpointer_type (void)
+-{
+-  static tree cache;
+-
+-  if (cache == NULL_TREE)
+-    cache = build_pointer_type (build_qualified_type
+-				(void_type_node,
+-				 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
+-  return cache;
+-}
+-
+ /* Source-level argument types.  */
+ #define LARCH_ATYPE_VOID void_type_node
+ #define LARCH_ATYPE_INT integer_type_node
+ #define LARCH_ATYPE_POINTER ptr_type_node
+ #define LARCH_ATYPE_CVPOINTER loongarch_build_cvpointer_type ()
+-#define LARCH_ATYPE_BOOLEAN boolean_type_node 
++#define LARCH_ATYPE_BOOLEAN boolean_type_node
+ /* Standard mode-based argument types.  */
+ #define LARCH_ATYPE_QI intQI_type_node
+ #define LARCH_ATYPE_UQI unsigned_intQI_type_node
+@@ -2495,8 +2418,7 @@ loongarch_build_cvpointer_type (void)
+ 
+ /* LARCH_FTYPE_ATYPESN takes N LARCH_FTYPES-like type codes and lists
+    their associated LARCH_ATYPEs.  */
+-#define LARCH_FTYPE_ATYPES1(A, B) \
+-  LARCH_ATYPE_##A, LARCH_ATYPE_##B
++#define LARCH_FTYPE_ATYPES1(A, B) LARCH_ATYPE_##A, LARCH_ATYPE_##B
+ 
+ #define LARCH_FTYPE_ATYPES2(A, B, C) \
+   LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C
+@@ -2508,13 +2430,6 @@ loongarch_build_cvpointer_type (void)
+   LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C, LARCH_ATYPE_##D, \
+   LARCH_ATYPE_##E
+ 
+-/* Index I is the function declaration for loongarch_builtins[I], or null if the
+-   function isn't defined on this target.  */
+-static GTY(()) tree loongarch_builtin_decls[ARRAY_SIZE (loongarch_builtins)];
+-/* Get the index I of the function declaration for loongarch_builtin_decls[I]
+-   using the instruction code or return null if not defined for the target.  */
+-static GTY(()) int loongarch_get_builtin_decl_index[NUM_INSN_CODES];
+-
+ /* Return the function type associated with function prototype TYPE.  */
+ 
+ static tree
+@@ -2525,11 +2440,10 @@ loongarch_build_function_type (enum loongarch_function_type type)
+   if (types[(int) type] == NULL_TREE)
+     switch (type)
+       {
+-#define DEF_LARCH_FTYPE(NUM, ARGS)					\
+-  case LARCH_FTYPE_NAME##NUM ARGS:					\
+-    types[(int) type]							\
+-      = build_function_type_list (LARCH_FTYPE_ATYPES##NUM ARGS,		\
+-				  NULL_TREE);				\
++#define DEF_LARCH_FTYPE(NUM, ARGS) \
++  case LARCH_FTYPE_NAME##NUM ARGS: \
++    types[(int) type] \
++      = build_function_type_list (LARCH_FTYPE_ATYPES##NUM ARGS, NULL_TREE); \
+     break;
+ #include "config/loongarch/loongarch-ftypes.def"
+ #undef DEF_LARCH_FTYPE
+@@ -2547,6 +2461,7 @@ loongarch_init_builtins (void)
+ {
+   const struct loongarch_builtin_description *d;
+   unsigned int i;
++  tree type;
+ 
+   /* Iterate through all of the bdesc arrays, initializing all of the
+      builtin functions.  */
+@@ -2555,10 +2470,10 @@ loongarch_init_builtins (void)
+       d = &loongarch_builtins[i];
+       if (d->avail ())
+ 	{
++	  type = loongarch_build_function_type (d->function_type);
+ 	  loongarch_builtin_decls[i]
+-	    = add_builtin_function (d->name,
+-				    loongarch_build_function_type (d->function_type),
+-				    i, BUILT_IN_MD, NULL, NULL);
++	    = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL,
++				    NULL);
+ 	  loongarch_get_builtin_decl_index[d->icode] = i;
+ 	}
+     }
+@@ -2574,6 +2489,104 @@ loongarch_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
+   return loongarch_builtin_decls[code];
+ }
+ 
++/* Handler for an SLEEF-style interface to
++   a library with vectorized intrinsics.  */
++static tree
++loongarch_builtin_vectorized_libsleef (combined_fn fn, tree type_out, tree type_in)
++{
++  char name[20];
++  tree fntype, new_fndecl;
++  unsigned args = 1;
++  const char *bname;
++  machine_mode el_mode, in_mode;
++  int n, in_n;
++
++  /* The SLEEF is suitable for unsafe math only.  */
++  if (!flag_unsafe_math_optimizations || !ISA_HAS_LSX)
++    return NULL_TREE;
++
++  el_mode = TYPE_MODE (TREE_TYPE (type_out));
++  n = TYPE_VECTOR_SUBPARTS (type_out);
++  in_mode = TYPE_MODE (TREE_TYPE (type_in));
++  in_n = TYPE_VECTOR_SUBPARTS (type_in);
++  if (el_mode != in_mode
++      || n != in_n)
++    return NULL_TREE;
++
++  switch (fn)
++    {
++    CASE_CFN_ATAN2:
++    CASE_CFN_POW:
++      args = 2;
++      gcc_fallthrough ();
++
++    CASE_CFN_EXP:
++    CASE_CFN_LOG:
++    CASE_CFN_LOG1P:
++    CASE_CFN_LOG2:
++    CASE_CFN_LOG10:
++    CASE_CFN_TANH:
++    CASE_CFN_TAN:
++    CASE_CFN_ATAN:
++    CASE_CFN_ATANH:
++    CASE_CFN_CBRT:
++    CASE_CFN_SINH:
++    CASE_CFN_SIN:
++    CASE_CFN_ASINH:
++    CASE_CFN_ASIN:
++    CASE_CFN_COSH:
++    CASE_CFN_COS:
++    CASE_CFN_ACOSH:
++    CASE_CFN_ACOS:
++      break;
++
++    default:
++      return NULL_TREE;
++    }
++
++  tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
++  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
++
++  if (args == 1)
++    {
++      if (n == 8 && el_mode == SFmode)
++	sprintf (name, "_ZGVdN8v_%s", bname+10);
++      else if (n == 4 && el_mode == DFmode)
++	sprintf (name, "_ZGVdN4v_%s", bname+10);
++      else if (n == 4 && el_mode == SFmode)
++	sprintf (name, "_ZGVbN4v_%s", bname+10);
++      else
++	sprintf (name, "_ZGVbN2v_%s", bname+10);
++
++      fntype = build_function_type_list (type_out, type_in, NULL);
++    }
++  else if (args == 2)
++    {
++      if (n == 8 && el_mode == SFmode)
++	sprintf (name, "_ZGVdN8vv_%s", bname+10);
++      else if (n == 4 && el_mode == DFmode)
++	sprintf (name, "_ZGVdN4vv_%s", bname+10);
++      else if (n == 4 && el_mode == SFmode)
++	sprintf (name, "_ZGVbN4vv_%s", bname+10);
++      else
++	sprintf (name, "_ZGVbN2vv_%s", bname+10);
++
++      fntype = build_function_type_list (type_out, type_in, type_in, NULL);
++    }
++  else
++    gcc_unreachable ();
++
++  /* Build a function declaration for the vectorized function.  */
++  new_fndecl = build_decl (BUILTINS_LOCATION,
++			   FUNCTION_DECL, get_identifier (name), fntype);
++  TREE_PUBLIC (new_fndecl) = 1;
++  DECL_EXTERNAL (new_fndecl) = 1;
++  DECL_IS_NOVOPS (new_fndecl) = 1;
++  TREE_READONLY (new_fndecl) = 1;
++
++  return new_fndecl;
++}
++
+ /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.  */
+ 
+ tree
+@@ -2599,20 +2612,82 @@ loongarch_builtin_vectorized_function (unsigned int fn, tree type_out, tree type
+ 
+   switch (fn)
+     {
+-    case BUILT_IN_SQRT:
+-      if (out_mode == DFmode && out_n == 2
+-	  && in_mode == DFmode && in_n == 2)
+-	return LARCH_GET_BUILTIN (lsx_vfsqrt_d);
++    CASE_CFN_CEIL:
++      if (out_mode == DFmode && in_mode == DFmode)
++	{
++	  if (out_n == 2 && in_n == 2)
++	    return LARCH_GET_BUILTIN (lsx_vfrintrp_d);
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lasx_xvfrintrp_d);
++	}
++      if (out_mode == SFmode && in_mode == SFmode)
++	{
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lsx_vfrintrp_s);
++	  if (out_n == 8 && in_n == 8)
++	    return LARCH_GET_BUILTIN (lasx_xvfrintrp_s);
++	}
+       break;
+-    case BUILT_IN_SQRTF:
+-      if (out_mode == SFmode && out_n == 4
+-	  && in_mode == SFmode && in_n == 4)
+-	return LARCH_GET_BUILTIN (lsx_vfsqrt_s);
++
++    CASE_CFN_TRUNC:
++      if (out_mode == DFmode && in_mode == DFmode)
++	{
++	  if (out_n == 2 && in_n == 2)
++	    return LARCH_GET_BUILTIN (lsx_vfrintrz_d);
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lasx_xvfrintrz_d);
++	}
++      if (out_mode == SFmode && in_mode == SFmode)
++	{
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lsx_vfrintrz_s);
++	  if (out_n == 8 && in_n == 8)
++	    return LARCH_GET_BUILTIN (lasx_xvfrintrz_s);
++	}
+       break;
++
++    CASE_CFN_RINT:
++    CASE_CFN_ROUND:
++      if (out_mode == DFmode && in_mode == DFmode)
++	{
++	  if (out_n == 2 && in_n == 2)
++	    return LARCH_GET_BUILTIN (lsx_vfrint_d);
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lasx_xvfrint_d);
++	}
++      if (out_mode == SFmode && in_mode == SFmode)
++	{
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lsx_vfrint_s);
++	  if (out_n == 8 && in_n == 8)
++	    return LARCH_GET_BUILTIN (lasx_xvfrint_s);
++	}
++      break;
++
++    CASE_CFN_FLOOR:
++      if (out_mode == DFmode && in_mode == DFmode)
++	{
++	  if (out_n == 2 && in_n == 2)
++	    return LARCH_GET_BUILTIN (lsx_vfrintrm_d);
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lasx_xvfrintrm_d);
++	}
++      if (out_mode == SFmode && in_mode == SFmode)
++	{
++	  if (out_n == 4 && in_n == 4)
++	    return LARCH_GET_BUILTIN (lsx_vfrintrm_s);
++	  if (out_n == 8 && in_n == 8)
++	    return LARCH_GET_BUILTIN (lasx_xvfrintrm_s);
++	}
++      break;
++
+     default:
+       break;
+     }
+ 
++  /* Dispatch to a handler for a vectorization library.  */
++  if (loongarch_veclibabi_name && strcmp (loongarch_veclibabi_name, "sleef") == 0)
++    return loongarch_builtin_vectorized_libsleef (combined_fn (fn), type_out, type_in);
+   return NULL_TREE;
+ }
+ 
+@@ -2621,7 +2696,7 @@ loongarch_builtin_vectorized_function (unsigned int fn, tree type_out, tree type
+ 
+ static void
+ loongarch_prepare_builtin_arg (struct expand_operand *op, tree exp,
+-			  unsigned int argno)
++			       unsigned int argno)
+ {
+   tree arg;
+   rtx value;
+@@ -2649,11 +2724,10 @@ loongarch_gen_const_int_vector (machine_mode mode, HOST_WIDE_INT val)
+ 
+ static rtx
+ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops,
+-			  struct expand_operand *ops, bool has_target_p)
++			       struct expand_operand *ops, bool has_target_p)
+ {
+   machine_mode imode;
+   int rangelo = 0, rangehi = 0, error_opno = 0;
+-  rtx sireg;
+ 
+   switch (icode)
+     {
+@@ -3002,7 +3076,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops,
+ 
+ static rtx
+ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
+-			    bool has_target_p)
++				 bool has_target_p)
+ {
+   struct expand_operand ops[MAX_RECOG_OPERANDS];
+   int opno, argno;
+@@ -3069,7 +3143,8 @@ loongarch_expand_builtin_lsx_test_branch (enum insn_code icode, tree exp)
+ 
+ rtx
+ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+-		     machine_mode mode, int ignore)
++			  machine_mode mode ATTRIBUTE_UNUSED,
++			  int ignore ATTRIBUTE_UNUSED)
+ {
+   tree fndecl;
+   unsigned int fcode, avail;
+@@ -3097,6 +3172,7 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+     }
+   gcc_unreachable ();
+ }
++
+ /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+ 
+ void
+@@ -3112,32 +3188,32 @@ loongarch_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+   tree set_fcsr = loongarch_builtin_decls[LARCH_MOVGR2FCSR];
+   tree get_fcsr_hold_call = build_call_expr (get_fcsr, 1, const0);
+   tree hold_assign_orig = build4 (TARGET_EXPR, LARCH_ATYPE_USI,
+-                                 fcsr_orig_var, get_fcsr_hold_call,
+-                                 NULL, NULL);
++				  fcsr_orig_var, get_fcsr_hold_call,
++				  NULL, NULL);
+   tree hold_mod_val = build2 (BIT_AND_EXPR, LARCH_ATYPE_USI, fcsr_orig_var,
+ 			      build_int_cst (LARCH_ATYPE_USI, 0xffe0ffe0));
+   tree hold_assign_mod = build4 (TARGET_EXPR, LARCH_ATYPE_USI,
+-                                fcsr_mod_var, hold_mod_val, NULL, NULL);
+-  tree set_fcsr_hold_call = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var);
+-  tree hold_all = build2 (COMPOUND_EXPR, LARCH_ATYPE_USI,
+-			  hold_assign_orig, hold_assign_mod);
+-  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all,
+-		  set_fcsr_hold_call);
++				 fcsr_mod_var, hold_mod_val, NULL, NULL);
++  tree set_fcsr_hold_call = build_call_expr (set_fcsr, 2, const0,
++					     fcsr_mod_var);
++  tree hold_all = build2 (COMPOUND_EXPR, LARCH_ATYPE_USI, hold_assign_orig,
++			  hold_assign_mod);
++  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, set_fcsr_hold_call);
+ 
+   *clear = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var);
+ 
+   tree get_fcsr_update_call = build_call_expr (get_fcsr, 1, const0);
+   *update = build4 (TARGET_EXPR, LARCH_ATYPE_USI, exceptions_var,
+-                   get_fcsr_update_call, NULL, NULL);
+-  tree set_fcsr_update_call = build_call_expr (set_fcsr, 2, const0, fcsr_orig_var);
++		    get_fcsr_update_call, NULL, NULL);
++  tree set_fcsr_update_call = build_call_expr (set_fcsr, 2, const0,
++					       fcsr_orig_var);
+   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
+ 		    set_fcsr_update_call);
+   tree atomic_feraiseexcept
+     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+-  tree int_exceptions_var = fold_convert (integer_type_node,
+-					  exceptions_var);
+-  tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
+-						    1, int_exceptions_var);
++  tree int_exceptions_var = fold_convert (integer_type_node, exceptions_var);
++  tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, 1,
++						    int_exceptions_var);
+   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
+ 		    atomic_feraiseexcept_call);
+ }
+@@ -3149,4 +3225,3 @@ loongarch_build_builtin_va_list (void)
+ {
+   return ptr_type_node;
+ }
+-
+diff --git a/gcc/config/loongarch/loongarch-c.c b/gcc/config/loongarch/loongarch-c.c
+index 6eac43bdf..f8583f7aa 100644
+--- a/gcc/config/loongarch/loongarch-c.c
++++ b/gcc/config/loongarch/loongarch-c.c
+@@ -1,22 +1,22 @@
+ /* LoongArch-specific code for C family languages.
+-   Copyright (C) 2020-2021 Free Software Foundation, Inc.
+-   Contributed by Andrew Waterman (zhouyingkun@mail.loongson.cn).
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
+ 
+-   This file is part of GCC.
++This file is part of GCC.
+ 
+-   GCC is free software; you can redistribute it and/or modify
+-   it under the terms of the GNU General Public License as published by
+-   the Free Software Foundation; either version 3, or (at your option)
+-   any later version.
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
+ 
+-   GCC is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-   GNU General Public License for more details.
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
+ 
+-   You should have received a copy of the GNU General Public License
+-   along with GCC; see the file COPYING3.  If not see
+-   <http://www.gnu.org/licenses/>.  */
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
+ 
+ #define IN_TARGET_CODE 1
+ 
+@@ -31,7 +31,28 @@
+ #define builtin_define(TXT) cpp_define (pfile, TXT)
+ #define builtin_assert(TXT) cpp_assert (pfile, TXT)
+ 
+-/* TODO: what is the pfile technique ??? !!! */
++/* Define preprocessor macros for the -march and -mtune options.
++   PREFIX is either _LOONGARCH_ARCH or _LOONGARCH_TUNE, INFO is
++   the selected processor.  If INFO's canonical name is "foo",
++   define PREFIX to be "foo", and define an additional macro
++   PREFIX_FOO.  */
++#define LARCH_CPP_SET_PROCESSOR(PREFIX, CPU_TYPE)			\
++  do									\
++    {									\
++      char *macro, *p;							\
++      int cpu_type = (CPU_TYPE);					\
++									\
++      macro = concat ((PREFIX), "_",					\
++		      loongarch_cpu_strings[cpu_type], NULL);		\
++      for (p = macro; *p != 0; p++)					\
++	*p = TOUPPER (*p);						\
++									\
++      builtin_define (macro);						\
++      builtin_define_with_value ((PREFIX),				\
++				 loongarch_cpu_strings[cpu_type], 1);	\
++      free (macro);							\
++    }									\
++  while (0)
+ 
+ void
+ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+@@ -40,10 +61,43 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+   builtin_assert ("cpu=loongarch");
+   builtin_define ("__loongarch__");
+ 
+-  if (TARGET_FLOAT64)
+-    builtin_define ("__loongarch_fpr=64");
++  LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", la_target.cpu_arch);
++  LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", la_target.cpu_tune);
++
++  /* Base architecture / ABI.  */
++  if (TARGET_64BIT)
++    {
++      builtin_define ("__loongarch_grlen=64");
++      builtin_define ("__loongarch64");
++    }
++
++  if (TARGET_ABI_LP64)
++    {
++      builtin_define ("_ABILP64=3");
++      builtin_define ("_LOONGARCH_SIM=_ABILP64");
++      builtin_define ("__loongarch_lp64");
++    }
++
++  /* These defines reflect the ABI in use, not whether the
++     FPU is directly accessible.  */
++  if (TARGET_DOUBLE_FLOAT_ABI)
++    builtin_define ("__loongarch_double_float=1");
++  else if (TARGET_SINGLE_FLOAT_ABI)
++    builtin_define ("__loongarch_single_float=1");
++
++  if (TARGET_DOUBLE_FLOAT_ABI || TARGET_SINGLE_FLOAT_ABI)
++    builtin_define ("__loongarch_hard_float=1");
+   else
+-    builtin_define ("__loongarch_fpr=32");
++    builtin_define ("__loongarch_soft_float=1");
++
++
++  /* ISA Extensions.  */
++  if (TARGET_DOUBLE_FLOAT)
++    builtin_define ("__loongarch_frlen=64");
++  else if (TARGET_SINGLE_FLOAT)
++    builtin_define ("__loongarch_frlen=32");
++  else
++    builtin_define ("__loongarch_frlen=0");
+ 
+   if (ISA_HAS_LSX)
+     {
+@@ -62,74 +116,12 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+       builtin_define ("__loongarch_simd_width=256");
+     }
+ 
+-  LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", loongarch_arch_info);
+-  LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", loongarch_tune_info);
+-
+-
+-  switch (loongarch_abi)
+-    {
+-    case ABILP32:
+-      builtin_define ("_ABILP32=1");
+-      builtin_define ("_LOONGARCH_SIM=_ABILP32");
+-      builtin_define ("__loongarch32");
+-      break;
+-
+-    case ABILPX32:
+-      builtin_define ("_ABILPX32=2");
+-      builtin_define ("_LOONGARCH_SIM=_ABILPX32");
+-      break;
+-
+-    case ABILP64:
+-      builtin_define ("_ABILP64=3");
+-      builtin_define ("_LOONGARCH_SIM=_ABILP64");
+-      builtin_define ("__loongarch64");
+-      break;
+-    }
+ 
++  /* Native Data Sizes.  */
+   builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
+   builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
+   builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE);
+-  builtin_define_with_int_value ("_LOONGARCH_FPSET",
+-				 32 / MAX_FPRS_PER_FMT);
+-  builtin_define_with_int_value ("_LOONGARCH_SPFPSET",
+-				 32);
+-
+-  /* These defines reflect the ABI in use, not whether the
+-     FPU is directly accessible.  */
+-  if (TARGET_NO_FLOAT)
+-    builtin_define ("__loongarch_no_float");
+-  else if (TARGET_HARD_FLOAT_ABI)
+-    builtin_define ("__loongarch_hard_float");
+-  else
+-    builtin_define ("__loongarch_soft_float");
++  builtin_define_with_int_value ("_LOONGARCH_FPSET", 32);
++  builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32);
+ 
+-  if (TARGET_SINGLE_FLOAT)
+-    builtin_define ("__loongarch_single_float");
+-
+-  /* Macros dependent on the C dialect.  */
+-  if (preprocessing_asm_p ())
+-    {
+-      builtin_define_std ("LANGUAGE_ASSEMBLY");
+-      builtin_define ("_LANGUAGE_ASSEMBLY");
+-    }
+-  else if (c_dialect_cxx ())
+-    {
+-      builtin_define ("_LANGUAGE_C_PLUS_PLUS");
+-      builtin_define ("__LANGUAGE_C_PLUS_PLUS");
+-      builtin_define ("__LANGUAGE_C_PLUS_PLUS__");
+-    }
+-  else
+-    {
+-      builtin_define_std ("LANGUAGE_C");
+-      builtin_define ("_LANGUAGE_C");
+-    }
+-
+-  if (c_dialect_objc ())
+-    {
+-      builtin_define ("_LANGUAGE_OBJECTIVE_C");
+-      builtin_define ("__LANGUAGE_OBJECTIVE_C");
+-      /* Bizarre, but retained for backwards compatibility.  */
+-      builtin_define_std ("LANGUAGE_C");
+-      builtin_define ("_LANGUAGE_C");
+-    }
+ }
+diff --git a/gcc/config/loongarch/loongarch-cpu.c b/gcc/config/loongarch/loongarch-cpu.c
+new file mode 100644
+index 000000000..ce2e649c8
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-cpu.c
+@@ -0,0 +1,291 @@
++/* Definitions for LoongArch CPU properties.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "diagnostic-core.h"
++
++#include "loongarch-def.h"
++#include "loongarch-opts.h"
++#include "loongarch-cpu.h"
++#include "loongarch-str.h"
++
++/* Native CPU detection with "cpucfg" */
++#define N_CPUCFG_WORDS 0x15
++static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
++static const int cpucfg_useful_idx[] = {0, 1, 2, 16, 17, 18, 19};
++
++static uint32_t
++read_cpucfg_word (int wordno)
++{
++  /* To make cross-compiler shut up.  */
++  (void) wordno;
++  uint32_t ret = 0;
++
++  #ifdef __loongarch__
++  __asm__ ("cpucfg %0,%1\n\t" :"=r"(ret) :"r"(wordno));
++  #endif
++
++  return ret;
++}
++
++void
++cache_cpucfg (void)
++{
++  for (unsigned int i = 0; i < sizeof (cpucfg_useful_idx) / sizeof (int); i++)
++    {
++      cpucfg_cache[cpucfg_useful_idx[i]]
++	= read_cpucfg_word (cpucfg_useful_idx[i]);
++    }
++}
++
++uint32_t
++get_native_prid (void)
++{
++  /* Fill loongarch_cpu_default_config[CPU_NATIVE] with cpucfg data,
++     see "Loongson Architecture Reference Manual"
++     (Volume 1, Section 2.2.10.5) */
++  return cpucfg_cache[0];
++}
++
++const char*
++get_native_prid_str (void)
++{
++  static char prid_str[9];
++  sprintf (prid_str, "%08x", cpucfg_cache[0]);
++  return (const char*) prid_str;
++}
++
++
++/* Fill property tables for CPU_NATIVE.  */
++void
++fill_native_cpu_config (struct loongarch_target *tgt)
++{
++  int arch_native_p = tgt->cpu_arch == CPU_NATIVE;
++  int tune_native_p = tgt->cpu_tune == CPU_NATIVE;
++  int native_cpu_type = CPU_NATIVE;
++
++  /* Nothing needs to be done unless "-march/tune=native"
++     is given or implied.  */
++  if (!arch_native_p && !tune_native_p)
++    return;
++
++  /* Fill cpucfg_cache with the "cpucfg" instruction.  */
++  cache_cpucfg ();
++
++  /* Fill: tgt->cpu_arch | tgt->cpu_tune
++     With: processor ID (PRID)
++     At:   cpucfg_words[0][31:0] */
++
++  switch (cpucfg_cache[0] & 0x00ffff00)
++  {
++    case 0x0014d000:   /* LA664 */
++      native_cpu_type = CPU_LA664;
++      break;
++
++    case 0x0014c000:   /* LA464 */
++      native_cpu_type = CPU_LA464;
++      break;
++
++    case 0x0014b000:   /* LA364 */
++      native_cpu_type = CPU_LA364;
++      break;
++
++    case 0x0014a000:   /* LA264 */
++      native_cpu_type = CPU_LA264;
++      break;
++
++    default:
++      /* Unknown PRID.  */
++      if (tune_native_p)
++	inform (UNKNOWN_LOCATION, "unknown processor ID %<0x%x%>, "
++		"some tuning parameters will fall back to default",
++		cpucfg_cache[0]);
++      break;
++  }
++
++  /* if -march=native */
++  if (arch_native_p)
++    {
++      int tmp;
++      tgt->cpu_arch = native_cpu_type;
++
++      /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].base
++	 With: base architecture (ARCH)
++	 At:   cpucfg_words[1][1:0] */
++
++      #define PRESET_ARCH (loongarch_cpu_default_isa[tgt->cpu_arch].base)
++      switch (cpucfg_cache[1] & 0x3)
++	{
++	  case 0x02:
++	    tmp = ISA_BASE_LA64V100;
++	    break;
++
++	  default:
++	    fatal_error (UNKNOWN_LOCATION,
++			 "unknown native base architecture %<0x%x%>, %qs failed",
++			 (unsigned int) (cpucfg_cache[1] & 0x3),
++			 "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
++	}
++
++      /* Check consistency with PRID presets.  */
++      if (native_cpu_type != CPU_NATIVE && tmp != PRESET_ARCH)
++	warning (0, "base architecture %qs differs from PRID preset %qs",
++		 loongarch_isa_base_strings[tmp],
++		 loongarch_isa_base_strings[PRESET_ARCH]);
++
++      /* Use the native value anyways.  */
++      PRESET_ARCH = tmp;
++
++      /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].fpu
++	 With: FPU type (FP, FP_SP, FP_DP)
++	 At:   cpucfg_words[2][2:0] */
++
++      #define PRESET_FPU (loongarch_cpu_default_isa[tgt->cpu_arch].fpu)
++      switch (cpucfg_cache[2] & 0x7)
++	{
++	  case 0x07:
++	    tmp = ISA_EXT_FPU64;
++	    break;
++
++	  case 0x03:
++	    tmp = ISA_EXT_FPU32;
++	    break;
++
++	  case 0x00:
++	    tmp = ISA_EXT_NONE;
++	    break;
++
++	  default:
++	    fatal_error (UNKNOWN_LOCATION,
++			 "unknown native FPU type %<0x%x%>, %qs failed",
++			 (unsigned int) (cpucfg_cache[2] & 0x7),
++			 "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
++	}
++
++      /* Check consistency with PRID presets.  */
++      if (native_cpu_type != CPU_NATIVE && tmp != PRESET_FPU)
++	warning (0, "floating-point unit %qs differs from PRID preset %qs",
++		 loongarch_isa_ext_strings[tmp],
++		 loongarch_isa_ext_strings[PRESET_FPU]);
++
++      /* Use the native value anyways.  */
++      PRESET_FPU = tmp;
++
++
++      /* Fill: loongarch_cpu_default_isa[CPU_NATIVE].simd
++	 With: SIMD extension type (LSX, LASX)
++	 At:   cpucfg_words[2][7:6] */
++
++      #define PRESET_SIMD (loongarch_cpu_default_isa[tgt->cpu_arch].simd)
++      switch (cpucfg_cache[2] & 0xc0)
++	{
++	  case 0xc0:
++	    tmp = ISA_EXT_SIMD_LASX;
++	    break;
++
++	  case 0x40:
++	    tmp = ISA_EXT_SIMD_LSX;
++	    break;
++
++	  case 0x80:
++	    warning (0, "unknown SIMD extension "
++			"(%qs disabled while %qs is enabled), disabling SIMD",
++			loongarch_isa_ext_strings[ISA_EXT_SIMD_LSX],
++			loongarch_isa_ext_strings[ISA_EXT_SIMD_LASX]);
++
++	  case 0x00:
++	    tmp = 0;
++	    break;
++	}
++
++      /* Check consistency with PRID presets.  */
++      /*
++      if (native_cpu_type != CPU_NATIVE && tmp != PRESET_SIMD)
++	warning (0, "SIMD extension %qs differs from PRID preset %qs",
++		 loongarch_isa_ext_strings[tmp],
++		 loongarch_isa_ext_strings[PRESET_SIMD]);
++      */
++
++      /* Use the native value anyways.  */
++      PRESET_SIMD = tmp;
++    }
++
++  if (tune_native_p)
++    {
++      tgt->cpu_tune = native_cpu_type;
++
++      /* Fill: loongarch_cpu_cache[tgt->cpu_tune]
++	 With: cache size info
++	 At:   cpucfg_words[16:20][31:0] */
++
++      #define PRESET_CACHE (loongarch_cpu_cache[tgt->cpu_tune])
++      struct loongarch_cache native_cache;
++      int l1d_present = 0, l1u_present = 0;
++      int l2d_present = 0;
++      uint32_t l1_szword, l2_szword;
++
++      l1u_present |= cpucfg_cache[16] & 3;	  /* bit[1:0]: unified l1 */
++      l1d_present |= cpucfg_cache[16] & 4;	  /* bit[2:2]: l1d */
++      l1_szword = l1d_present ? 18 : (l1u_present ? 17 : 0);
++      l1_szword = l1_szword ? cpucfg_cache[l1_szword]: 0;
++
++      l2d_present |= cpucfg_cache[16] & 24;	  /* bit[4:3]: unified l2 */
++      l2d_present |= cpucfg_cache[16] & 128;	  /* bit[7:7]: l2d */
++      l2_szword = l2d_present ? cpucfg_cache[19]: 0;
++
++      native_cache.l1d_line_size
++	= 1 << ((l1_szword & 0x7f000000) >> 24);  /* bit[30:24]: log2(line) */
++
++      native_cache.l1d_size
++	= (1 << ((l1_szword & 0x00ff0000) >> 16)) /* bit[23:16]: log2(idx) */
++	* ((l1_szword & 0x0000ffff) + 1)	  /* bit[15:0]:  sets - 1 */
++	* (1 << ((l1_szword & 0x7f000000) >> 24)) /* bit[30:24]: log2(line) */
++	>> 10;					  /* in kibibytes */
++
++      native_cache.l2d_size
++	= (1 << ((l2_szword & 0x00ff0000) >> 16)) /* bit[23:16]: log2(idx) */
++	* ((l2_szword & 0x0000ffff) + 1)	  /* bit[15:0]:  sets - 1 */
++	* (1 << ((l2_szword & 0x7f000000) >> 24)) /* bit[30:24]: log2(linesz) */
++	>> 10;					  /* in kibibytes */
++
++      /*
++      if (native_cpu_type != CPU_NATIVE && (
++	  native_cache.l1d_line_size != PRESET_CACHE.l1d_line_size ||
++	  native_cache.l1d_size != PRESET_CACHE.l1d_size ||
++	  native_cache.l2d_size != PRESET_CACHE.l2d_size))
++	warning (0, "native cache info (%<L1 %dK, L2 %dK, %dB%>) "
++		    "differs from PRID preset (%<L1 %dK, L2 %dK, %dB%>)",
++		    native_cache.l1d_size, native_cache.l2d_size,
++		    native_cache.l1d_line_size,
++		    PRESET_CACHE.l1d_size, PRESET_CACHE.l2d_size,
++		    PRESET_CACHE.l1d_line_size);
++      */
++
++      /* Use the native value anyways.  */
++      PRESET_CACHE.l1d_line_size = native_cache.l1d_line_size;
++      PRESET_CACHE.l1d_size = native_cache.l1d_size;
++      PRESET_CACHE.l2d_size = native_cache.l2d_size;
++    }
++}
+diff --git a/gcc/config/loongarch/loongarch-d.c b/gcc/config/loongarch/loongarch-cpu.h
+similarity index 59%
+rename from gcc/config/loongarch/loongarch-d.c
+rename to gcc/config/loongarch/loongarch-cpu.h
+index 971e5d33e..08d018372 100644
+--- a/gcc/config/loongarch/loongarch-d.c
++++ b/gcc/config/loongarch/loongarch-cpu.h
+@@ -1,5 +1,7 @@
+-/* Subroutines for the D front end on the LARCH architecture.
+-   Copyright (C) 2017 Free Software Foundation, Inc.
++/* Definitions for loongarch native cpu property detection routines.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++
++This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+@@ -15,17 +17,15 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+-#include "config.h"
++#ifndef LOONGARCH_CPU_H
++#define LOONGARCH_CPU_H
++
+ #include "system.h"
+-#include "coretypes.h"
+-#include "tm.h"
+-#include "d/d-target.h"
+-#include "d/d-target-def.h"
+-
+-/* Implement TARGET_D_CPU_VERSIONS for LARCH targets.  */
+-
+-void
+-loongarch_d_target_versions (void)
+-{
+-  // need to be improved !!
+-}
++#include "loongarch-def.h"
++
++void cache_cpucfg (void);
++void fill_native_cpu_config (struct loongarch_target *tgt);
++uint32_t get_native_prid (void);
++const char* get_native_prid_str (void);
++
++#endif /* LOONGARCH_CPU_H */
+diff --git a/gcc/config/loongarch/loongarch-cpus.def b/gcc/config/loongarch/loongarch-cpus.def
+deleted file mode 100644
+index 7ce2508e3..000000000
+--- a/gcc/config/loongarch/loongarch-cpus.def
++++ /dev/null
+@@ -1,38 +0,0 @@
+-/* LARCH CPU names.
+-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify
+-it under the terms of the GNU General Public License as published by
+-the Free Software Foundation; either version 3, or (at your option)
+-any later version.
+-
+-GCC is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-GNU General Public License for more details.
+-
+-You should have received a copy of the GNU General Public License
+-along with GCC; see the file COPYING3.  If not see
+-<http://www.gnu.org/licenses/>.  */
+-
+-/* A table describing all the processors GCC knows about.  The first
+-   mention of an ISA level is taken as the canonical name for that
+-   ISA.
+-
+-   To ease comparison, please keep this table in the same order
+-   as GAS's loongarch_cpu_info_table.  Please also make sure that
+-   LARCH_ISA_LEVEL_SPEC and LARCH_ARCH_FLOAT_SPEC handle all -march
+-   options correctly.
+-
+-   Before including this file, define a macro:
+-
+-   LARCH_CPU (NAME, CPU, ISA, FLAGS)
+-
+-   where the arguments are the fields of struct loongarch_cpu_info.  */
+-
+-/* Entries for generic ISAs.  */
+-LARCH_CPU ("loongarch64", PROCESSOR_LOONGARCH64, 0, 0)
+-LARCH_CPU ("la464", PROCESSOR_LA464, 0, 0)
+-
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+new file mode 100644
+index 000000000..dde7a5dba
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-def.c
+@@ -0,0 +1,232 @@
++/* LoongArch static properties.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "loongarch-def.h"
++#include "loongarch-str.h"
++
++/* CPU property tables.  */
++const char*
++loongarch_cpu_strings[N_TUNE_TYPES] = {
++  [CPU_NATIVE]		  = STR_CPU_NATIVE,
++  [CPU_ABI_DEFAULT]	  = STR_CPU_ABI_DEFAULT,
++  [CPU_LOONGARCH64]	  = STR_CPU_LOONGARCH64,
++  [CPU_LA464]		  = STR_CPU_LA464,
++  [CPU_LA364]		  = STR_CPU_LA364,
++  [CPU_LA264]		  = STR_CPU_LA264,
++  [CPU_LA664]		  = STR_CPU_LA664,
++};
++
++struct loongarch_isa
++loongarch_cpu_default_isa[N_ARCH_TYPES] = {
++  [CPU_LOONGARCH64] = {
++      .base = ISA_BASE_LA64V100,
++      .fpu = ISA_EXT_FPU64,
++      .simd = 0,
++  },
++  [CPU_LA464] = {
++      .base = ISA_BASE_LA64V100,
++      .fpu = ISA_EXT_FPU64,
++      .simd = ISA_EXT_SIMD_LASX,
++  },
++  [CPU_LA364] = {
++      .base = ISA_BASE_LA64V100,
++      .fpu = ISA_EXT_FPU64,
++      .simd = ISA_EXT_SIMD_LSX,
++  },
++  [CPU_LA264] = {
++      .base = ISA_BASE_LA64V100,
++      .fpu = ISA_EXT_FPU64,
++      .simd = ISA_EXT_SIMD_LSX,
++  },
++  [CPU_LA664] = {
++      .base = ISA_BASE_LA64V100,
++      .fpu = ISA_EXT_FPU64,
++      .simd = ISA_EXT_SIMD_LASX,
++  },
++};
++
++struct loongarch_cache
++loongarch_cpu_cache[N_TUNE_TYPES] = {
++  [CPU_LOONGARCH64] = {
++      .l1d_line_size = 64,
++      .l1d_size = 64,
++      .l2d_size = 256,
++      .simultaneous_prefetches = 4,
++  },
++  [CPU_LA464] = {
++      .l1d_line_size = 64,
++      .l1d_size = 64,
++      .l2d_size = 256,
++      .simultaneous_prefetches = 4,
++  },
++  [CPU_LA364] = {
++      .l1d_line_size = 64,
++      .l1d_size = 64,
++      .l2d_size = 0,
++      .simultaneous_prefetches = 4,
++  },
++  [CPU_LA264] = {
++      .l1d_line_size = 64,
++      .l1d_size = 32,
++      .l2d_size = 0,
++      .simultaneous_prefetches = 4,
++  },
++  [CPU_LA664] = {
++      .l1d_line_size = 64,
++      .l1d_size = 64,
++      .l2d_size = 256,
++      .simultaneous_prefetches = 4,
++  },
++};
++
++/* RTX costs */
++/* Default RTX cost initializer.  */
++#define COSTS_N_INSNS(N) ((N) * 4)
++#define DEFAULT_COSTS				\
++    .fp_add		= COSTS_N_INSNS (1),	\
++    .fp_mult_sf		= COSTS_N_INSNS (2),	\
++    .fp_mult_df		= COSTS_N_INSNS (4),	\
++    .fp_div_sf		= COSTS_N_INSNS (6),	\
++    .fp_div_df		= COSTS_N_INSNS (8),	\
++    .int_mult_si	= COSTS_N_INSNS (1),	\
++    .int_mult_di	= COSTS_N_INSNS (1),	\
++    .int_div_si		= COSTS_N_INSNS (4),	\
++    .int_div_di		= COSTS_N_INSNS (6),	\
++    .branch_cost	= 6,			\
++    .memory_latency	= 4
++
++/* The following properties cannot be looked up directly using "cpucfg".
++ So it is necessary to provide a default value for "unknown native"
++ tune targets (i.e. -mtune=native while PRID does not correspond to
++ any known "-mtune" type).  */
++
++struct loongarch_rtx_cost_data
++loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = {
++  [CPU_NATIVE] = {
++      DEFAULT_COSTS
++  },
++  [CPU_LOONGARCH64] = {
++      DEFAULT_COSTS
++  },
++  [CPU_LA464] = {
++      DEFAULT_COSTS
++  },
++  [CPU_LA364] = {
++      DEFAULT_COSTS
++  },
++  [CPU_LA264] = {
++      DEFAULT_COSTS
++  },
++  [CPU_LA664] = {
++      DEFAULT_COSTS
++  },
++};
++
++/* RTX costs to use when optimizing for size.  */
++const struct loongarch_rtx_cost_data
++loongarch_rtx_cost_optimize_size = {
++    .fp_add	      = 4,
++    .fp_mult_sf	      = 4,
++    .fp_mult_df	      = 4,
++    .fp_div_sf	      = 4,
++    .fp_div_df	      = 4,
++    .int_mult_si      = 4,
++    .int_mult_di      = 4,
++    .int_div_si	      = 4,
++    .int_div_di	      = 4,
++    .branch_cost      = 2,
++    .memory_latency   = 4,
++};
++
++int
++loongarch_cpu_issue_rate[N_TUNE_TYPES] = {
++  [CPU_NATIVE]	      = 4,
++  [CPU_LOONGARCH64]   = 4,
++  [CPU_LA464]	      = 4,
++  [CPU_LA364]	      = 3,
++  [CPU_LA264]	      = 2,
++  [CPU_LA664]	      = 6,
++};
++
++int
++loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
++  [CPU_NATIVE]	      = 4,
++  [CPU_LOONGARCH64]   = 4,
++  [CPU_LA464]	      = 4,
++  [CPU_LA364]	      = 4,
++  [CPU_LA264]	      = 4,
++  [CPU_LA664]	      = 4,
++};
++
++/* Wiring string definitions from loongarch-str.h to global arrays
++   with standard index values from loongarch-opts.h, so we can
++   print config-related messages and do ABI self-spec filtering
++   from the driver in a self-consistent manner.  */
++
++const char*
++loongarch_isa_base_strings[N_ISA_BASE_TYPES] = {
++  [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100,
++};
++
++const char*
++loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = {
++  [ISA_EXT_NONE] = STR_NONE,
++  [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32,
++  [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64,
++  [ISA_EXT_SIMD_LSX] = STR_ISA_EXT_LSX,
++  [ISA_EXT_SIMD_LASX] = STR_ISA_EXT_LASX,
++};
++
++const char*
++loongarch_abi_base_strings[N_ABI_BASE_OPTS] = {
++  [ABI_BASE_LP64D] = STR_ABI_BASE_LP64D,
++  [ABI_BASE_LP64F] = STR_ABI_BASE_LP64F,
++  [ABI_BASE_LP64S] = STR_ABI_BASE_LP64S,
++  [ABI_BASE_LP64] = STR_ABI_BASE_LP64,
++};
++
++const char*
++loongarch_abi_ext_strings[N_ABI_EXT_TYPES] = {
++  [ABI_EXT_BASE] = STR_ABI_EXT_BASE,
++};
++
++const char*
++loongarch_cmodel_strings[] = {
++  [CMODEL_NORMAL]	  = STR_CMODEL_NORMAL,
++  [CMODEL_TINY]		  = STR_CMODEL_TINY,
++  [CMODEL_TINY_STATIC]	  = STR_CMODEL_TS,
++  [CMODEL_LARGE]	  = STR_CMODEL_LARGE,
++  [CMODEL_EXTREME]	  = STR_CMODEL_EXTREME,
++};
++
++
++/* ABI-related definitions.  */
++const struct loongarch_isa
++abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = {
++  [ABI_BASE_LP64D] = {
++      [ABI_EXT_BASE] = {.base = ISA_BASE_LA64V100, .fpu = ISA_EXT_FPU64, .simd = 0},
++  },
++  [ABI_BASE_LP64F] = {
++      [ABI_EXT_BASE] = {.base = ISA_BASE_LA64V100, .fpu = ISA_EXT_FPU32, .simd = 0},
++  },
++  [ABI_BASE_LP64S] = {
++      [ABI_EXT_BASE] = {.base = ISA_BASE_LA64V100, .fpu = ISA_EXT_NONE, .simd = 0},
++  },
++};
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+new file mode 100644
+index 000000000..45d9ac16c
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-def.h
+@@ -0,0 +1,161 @@
++/* LoongArch definitions.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* Definition of standard codes for:
++    - base architecture types	(isa_base),
++    - ISA extensions		(isa_ext),
++    - base ABI types		(abi_base),
++    - ABI extension types	(abi_ext).
++
++    - code models		      (cmodel)
++    - other command-line switches     (switch)
++
++   These values are primarily used for implementing option handling
++   logic in "loongarch.opt", "loongarch-driver.c" and "loongarch-opt.c".
++
++   As for the result of this option handling process, the following
++   scheme is adopted to represent the final configuration:
++
++    - The target ABI is encoded with a tuple (abi_base, abi_ext)
++      using the code defined below.
++
++    - The target ISA is encoded with a "struct loongarch_isa" defined
++      in loongarch-cpu.h.
++
++    - The target microarchitecture is represented with a cpu model
++      index defined in loongarch-cpu.h.
++*/
++
++#ifndef LOONGARCH_DEF_H
++#define LOONGARCH_DEF_H
++
++#include "loongarch-tune.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/* enum isa_base */
++extern const char* loongarch_isa_base_strings[];
++#define ISA_BASE_LA64V100     0
++#define N_ISA_BASE_TYPES      1
++
++/* enum isa_ext_* */
++extern const char* loongarch_isa_ext_strings[];
++#define ISA_EXT_NONE	      0
++#define ISA_EXT_FPU32	      1
++#define ISA_EXT_FPU64	      2
++#define N_ISA_EXT_FPU_TYPES   3
++#define ISA_EXT_SIMD_LSX      3
++#define ISA_EXT_SIMD_LASX     4
++#define N_ISA_EXT_TYPES	      5
++
++/* enum abi_base */
++extern const char* loongarch_abi_base_strings[];
++#define ABI_BASE_LP64D	      0
++#define ABI_BASE_LP64F	      1
++#define ABI_BASE_LP64S	      2
++#define N_ABI_BASE_TYPES      3
++#define ABI_BASE_LP64         3
++#define N_ABI_BASE_OPTS       4
++
++#define IS_LP64_ABI_BASE(C) \
++  (C == ABI_BASE_LP64D || C == ABI_BASE_LP64F || C == ABI_BASE_LP64S)
++
++#define TO_LP64_ABI_BASE(C) (C)
++
++#define ABI_FPU_64(abi_base) \
++  (abi_base == ABI_BASE_LP64D)
++#define ABI_FPU_32(abi_base) \
++  (abi_base == ABI_BASE_LP64F)
++#define ABI_FPU_NONE(abi_base) \
++  (abi_base == ABI_BASE_LP64S)
++
++
++/* enum abi_ext */
++extern const char* loongarch_abi_ext_strings[];
++#define ABI_EXT_BASE	      0
++#define N_ABI_EXT_TYPES	      1
++
++/* enum cmodel */
++extern const char* loongarch_cmodel_strings[];
++#define CMODEL_NORMAL	      0
++#define CMODEL_TINY	      1
++#define CMODEL_TINY_STATIC    2
++#define CMODEL_LARGE	      3
++#define CMODEL_EXTREME	      4
++#define N_CMODEL_TYPES	      5
++
++/* The common default value for variables whose assignments
++   are triggered by command-line options.  */
++
++#define M_OPT_UNSET -1
++#define M_OPT_ABSENT(opt_enum)  ((opt_enum) == M_OPT_UNSET)
++
++
++/* Internal representation of the target.  */
++struct loongarch_isa
++{
++  int base;	    /* ISA_BASE_ */
++  int fpu;	    /* ISA_EXT_FPU_ */
++  int simd;	    /* ISA_EXT_SIMD_ */
++};
++
++struct loongarch_abi
++{
++  int base;	    /* ABI_BASE_ */
++  int ext;	    /* ABI_EXT_ */
++};
++
++struct loongarch_target
++{
++  struct loongarch_isa isa;
++  struct loongarch_abi abi;
++  int cpu_arch;	    /* CPU_ */
++  int cpu_tune;	    /* same */
++  int cmodel;	    /* CMODEL_ */
++};
++
++/* CPU properties.  */
++/* index */
++#define CPU_NATIVE	  0
++#define CPU_ABI_DEFAULT   1
++#define CPU_LOONGARCH64	  2
++#define CPU_LA464	  3
++#define CPU_LA364	  4
++#define CPU_LA264	  5
++#define CPU_LA664	  6
++#define N_ARCH_TYPES	  7
++#define N_TUNE_TYPES	  7
++#define CPU_NONE          8
++
++/* parallel tables */
++extern const char* loongarch_cpu_strings[];
++extern struct loongarch_isa loongarch_cpu_default_isa[];
++extern int loongarch_cpu_issue_rate[];
++extern int loongarch_cpu_multipass_dfa_lookahead[];
++
++extern struct loongarch_cache loongarch_cpu_cache[];
++extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
++
++#ifdef __cplusplus
++}
++#endif
++#endif /* LOONGARCH_DEF_H */
+diff --git a/gcc/config/loongarch/loongarch-driver.c b/gcc/config/loongarch/loongarch-driver.c
+new file mode 100644
+index 000000000..1f56df84f
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-driver.c
+@@ -0,0 +1,206 @@
++/* Subroutines for the gcc driver.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "obstack.h"
++#include "diagnostic-core.h"
++#include "opts.h"
++
++#include "loongarch-opts.h"
++#include "loongarch-driver.h"
++
++/* This flag is set to 1 if we believe that the user might be avoiding
++   linking (implicitly) against something from the startfile search paths.  */
++static int no_link = 0;
++
++/* Use the public obstack from the gcc driver (defined in gcc.c).
++   This is for allocating space for the returned string.  */
++extern struct obstack opts_obstack;
++
++const char*
++la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED)
++{
++  /* Initialize all fields of la_target to -1 */
++  loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET,
++			 M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET);
++  return "";
++}
++
++const char*
++driver_set_no_link (int argc, const char **argv)
++{
++  no_link = 1;
++  return "";
++}
++
++const char*
++driver_set_m_parm (int argc, const char **argv)
++{
++  gcc_assert (argc == 2);
++
++#define LARCH_DRIVER_PARSE_PARM(OPT_IDX, NAME, OPTSTR_LIST, \
++				OPT_IDX_LO, OPT_IDX_HI)	    \
++  if (strcmp (argv[0], OPTSTR_##NAME) == 0)		    \
++    for (int i = (OPT_IDX_LO); i < (OPT_IDX_HI); i++)	    \
++    {							    \
++      if ((OPTSTR_LIST)[i] != 0)			    \
++	if (strcmp (argv[1], (OPTSTR_LIST)[i]) == 0)	    \
++	  {						    \
++	    (OPT_IDX) = i;				    \
++	    return 0;					    \
++	  }						    \
++    }
++
++  LARCH_DRIVER_PARSE_PARM (la_target.abi.base, ABI_BASE, \
++			   loongarch_abi_base_strings, 0, N_ABI_BASE_OPTS)
++
++  LARCH_DRIVER_PARSE_PARM (la_target.isa.fpu, ISA_EXT_FPU, \
++			   loongarch_isa_ext_strings, 0, N_ISA_EXT_FPU_TYPES)
++
++  LARCH_DRIVER_PARSE_PARM (la_target.isa.simd, ISA_EXT_SIMD, \
++			   loongarch_isa_ext_strings, 0, N_ISA_EXT_TYPES)
++
++  LARCH_DRIVER_PARSE_PARM (la_target.cpu_arch, ARCH, \
++			   loongarch_cpu_strings, 0, N_ARCH_TYPES)
++
++  LARCH_DRIVER_PARSE_PARM (la_target.cpu_tune, TUNE, \
++			   loongarch_cpu_strings, 0, N_TUNE_TYPES)
++
++  LARCH_DRIVER_PARSE_PARM (la_target.cmodel, CMODEL, \
++			   loongarch_cmodel_strings, 0, N_CMODEL_TYPES)
++
++  gcc_unreachable ();
++}
++
++static void
++driver_record_deferred_opts (struct loongarch_flags *flags)
++{
++  unsigned int i;
++  cl_deferred_option *opt;
++  vec<cl_deferred_option> *v = (vec<cl_deferred_option> *) la_deferred_options;
++
++  gcc_assert (flags);
++
++  /* Initialize flags */
++  flags->flt = M_OPT_UNSET;
++  flags->flt_str = NULL;
++  flags->sx[0] = flags->sx[1] = 0;
++
++  int sx_flag_idx = 0;
++
++  if (v)
++    FOR_EACH_VEC_ELT (*v, i, opt)
++      {
++	switch (opt->opt_index)
++	  {
++	  case OPT_mlsx:
++	    flags->sx[sx_flag_idx++] = ISA_EXT_SIMD_LSX * (opt->value ? 1 : -1);
++	    break;
++
++	  case OPT_mlasx:
++	    flags->sx[sx_flag_idx++] = ISA_EXT_SIMD_LASX * (opt->value ? 1 : -1);
++	    break;
++
++	  case OPT_msoft_float:
++	    flags->flt = ISA_EXT_NONE;
++	    flags->flt_str = OPTSTR_SOFT_FLOAT;
++	    break;
++
++	  case OPT_msingle_float:
++	    flags->flt = ISA_EXT_FPU32;
++	    flags->flt_str = OPTSTR_SINGLE_FLOAT;
++	    break;
++
++	  case OPT_mdouble_float:
++	    flags->flt = ISA_EXT_FPU64;
++	    flags->flt_str = OPTSTR_DOUBLE_FLOAT;
++	    break;
++
++	  default:
++	    gcc_unreachable ();
++	  }
++	gcc_assert (sx_flag_idx <= 2);
++      }
++}
++
++const char*
++driver_get_normalized_m_opts (int argc, const char **argv ATTRIBUTE_UNUSED)
++{
++  if (argc != 0)
++    return " %eget_normalized_m_opts requires no argument.\n";
++
++  struct loongarch_flags flags;
++  driver_record_deferred_opts (&flags);
++  loongarch_config_target (&la_target, &flags, !no_link /* follow_multilib_list */);
++
++  /* Output normalized option strings.  */
++  obstack_blank (&opts_obstack, 0);
++
++#undef APPEND_LTR
++#define APPEND_LTR(S) \
++  obstack_grow (&opts_obstack, (const void*) (S), \
++		sizeof ((S)) / sizeof (char) -1)
++
++#undef APPEND_VAL
++#define APPEND_VAL(S) \
++  obstack_grow (&opts_obstack, (const void*) (S), strlen ((S)))
++
++#undef APPEND_OPT
++#define APPEND_OPT(NAME) \
++   APPEND_LTR (" %<m" OPTSTR_##NAME "=* " \
++	       " -m" OPTSTR_##NAME "=")
++
++#undef CLEAR_FLAG
++#define CLEAR_FLAG(NAME) \
++  APPEND_LTR ( " %<m" NAME " %<mno-" NAME )
++
++  CLEAR_FLAG (STR_ISA_EXT_LSX);
++  CLEAR_FLAG (STR_ISA_EXT_LASX);
++  CLEAR_FLAG (OPTSTR_SOFT_FLOAT);
++  CLEAR_FLAG (OPTSTR_SINGLE_FLOAT);
++  CLEAR_FLAG (OPTSTR_DOUBLE_FLOAT);
++
++  APPEND_OPT (ABI_BASE);
++  APPEND_VAL (loongarch_abi_base_strings[la_target.abi.base]);
++
++  APPEND_OPT (ARCH);
++  APPEND_VAL (loongarch_cpu_strings[la_target.cpu_arch]);
++
++  APPEND_OPT (ISA_EXT_FPU);
++  APPEND_VAL (loongarch_isa_ext_strings[la_target.isa.fpu]);
++
++  APPEND_OPT (ISA_EXT_SIMD);
++  APPEND_VAL (loongarch_isa_ext_strings[la_target.isa.simd]);
++
++  APPEND_OPT (CMODEL);
++  APPEND_VAL (loongarch_cmodel_strings[la_target.cmodel]);
++
++  APPEND_OPT (TUNE);
++  APPEND_VAL (loongarch_cpu_strings[la_target.cpu_tune]);
++
++  obstack_1grow (&opts_obstack, '\0');
++
++  return XOBFINISH (&opts_obstack, const char *);
++}
+diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
+new file mode 100644
+index 000000000..6457e028f
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-driver.h
+@@ -0,0 +1,72 @@
++/* Subroutine headers for the gcc driver.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef LOONGARCH_DRIVER_H
++#define LOONGARCH_DRIVER_H
++
++#include "loongarch-str.h"
++
++extern const char*
++la_driver_init (int argc, const char **argv);
++
++extern const char*
++driver_set_m_parm (int argc, const char **argv);
++
++extern const char*
++driver_set_no_link (int argc, const char **argv);
++
++extern const char*
++driver_get_normalized_m_opts (int argc, const char **argv);
++
++#define EXTRA_SPEC_FUNCTIONS \
++  { "driver_init", la_driver_init }, \
++  { "set_m_parm", driver_set_m_parm  }, \
++  { "set_no_link", driver_set_no_link }, \
++  { "get_normalized_m_opts", driver_get_normalized_m_opts  },
++
++/* Pre-process ABI-related options.  */
++#define LA_SET_PARM_SPEC(NAME) \
++  " %{m" OPTSTR_##NAME  "=*: %:set_m_parm(" OPTSTR_##NAME " %*)}" \
++
++#define DRIVER_HANDLE_MACHINE_OPTIONS \
++  " %:driver_init()" \
++  " %{c|S|E|nostdlib: %:set_no_link()}" \
++  " %{nostartfiles: %{nodefaultlibs: %:set_no_link()}}" \
++  LA_SET_PARM_SPEC (ABI_BASE) \
++  LA_SET_PARM_SPEC (ARCH) \
++  LA_SET_PARM_SPEC (TUNE) \
++  LA_SET_PARM_SPEC (ISA_EXT_FPU) \
++  LA_SET_PARM_SPEC (ISA_EXT_SIMD) \
++  LA_SET_PARM_SPEC (CMODEL) \
++  " %:get_normalized_m_opts()"
++
++#define DRIVER_SELF_SPECS \
++  DRIVER_HANDLE_MACHINE_OPTIONS
++
++/* ABI spec strings.  */
++#define ABI_GRLEN_SPEC \
++  "%{mabi=lp64*:64}"   \
++
++#define ABI_SPEC \
++  "%{mabi=lp64d:lp64d}" \
++  "%{mabi=lp64f:lp64f}" \
++  "%{mabi=lp64s:lp64s}" \
++
++#endif /* LOONGARCH_DRIVER_H */
+diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def
+index a10a025ba..1ef4e2dc8 100644
+--- a/gcc/config/loongarch/loongarch-ftypes.def
++++ b/gcc/config/loongarch/loongarch-ftypes.def
+@@ -1,5 +1,7 @@
+-/* Definitions of prototypes for LARCH built-in functions.  -*- C -*-
+-   Copyright (C) 2007-2018 Free Software Foundation, Inc.
++/* Definitions of prototypes for LoongArch built-in functions.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Co. Ltd.
++   Based on MIPS target for GNU compiler.
+ 
+ This file is part of GCC.
+ 
+@@ -18,11 +20,11 @@ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+ /* Invoke DEF_LARCH_FTYPE (NARGS, LIST) for each prototype used by
+-   LARCH built-in functions, where:
++   LoongArch built-in functions, where:
+ 
+       NARGS is the number of arguments.
+       LIST contains the return-type code followed by the codes for each
+-        argument type.
++      argument type.
+ 
+    Argument- and return-type codes are either modes or one of the following:
+ 
+@@ -30,65 +32,55 @@ along with GCC; see the file COPYING3.  If not see
+       INT for integer_type_node
+       POINTER for ptr_type_node
+ 
+-   (we don't use PTR because that's a ANSI-compatibillity macro).
++   (we don't use PTR because that's a ANSI-compatibility macro).
+ 
+    Please keep this list lexicographically sorted by the LIST argument.  */
+-DEF_LARCH_FTYPE (1, (DF, DF))
+-DEF_LARCH_FTYPE (2, (DF, DF, DF))
+-DEF_LARCH_FTYPE (1, (DF, V2DF))
+-DEF_LARCH_FTYPE (1, (DF, V4DF))
+ 
+-DEF_LARCH_FTYPE (1, (DI, DI))
+-DEF_LARCH_FTYPE (1, (DI, SI))
+-DEF_LARCH_FTYPE (1, (DI, UQI))
+-DEF_LARCH_FTYPE (1, (UDI, USI))
++/* Non-vector builtin types.  */
++
+ DEF_LARCH_FTYPE (1, (UQI, USI))
+-DEF_LARCH_FTYPE (1, (USI, UQI))
+ DEF_LARCH_FTYPE (1, (UHI, USI))
+-DEF_LARCH_FTYPE (2, (DI, DI, DI))
+-DEF_LARCH_FTYPE (2, (DI, DI, SI))
+-DEF_LARCH_FTYPE (2, (DI, DI, UQI))
++DEF_LARCH_FTYPE (1, (USI, USI))
++DEF_LARCH_FTYPE (1, (UDI, USI))
++DEF_LARCH_FTYPE (1, (USI, UQI))
++DEF_LARCH_FTYPE (1, (VOID, USI))
++
++DEF_LARCH_FTYPE (2, (VOID, UQI, USI))
++DEF_LARCH_FTYPE (2, (VOID, UHI, USI))
++DEF_LARCH_FTYPE (2, (VOID, USI, USI))
++DEF_LARCH_FTYPE (2, (VOID, UDI, USI))
+ DEF_LARCH_FTYPE (2, (VOID, DI, UQI))
+ DEF_LARCH_FTYPE (2, (VOID, SI, UQI))
++DEF_LARCH_FTYPE (2, (VOID, DI, DI))
++DEF_LARCH_FTYPE (2, (SI, SI, UQI))
++DEF_LARCH_FTYPE (2, (DI, DI, UQI))
++DEF_LARCH_FTYPE (2, (SI, QI, SI))
++DEF_LARCH_FTYPE (2, (SI, HI, SI))
++DEF_LARCH_FTYPE (2, (SI, SI, SI))
++DEF_LARCH_FTYPE (2, (SI, DI, SI))
++DEF_LARCH_FTYPE (2, (USI, USI, USI))
+ DEF_LARCH_FTYPE (2, (UDI, UDI, USI))
+-DEF_LARCH_FTYPE (3, (DI, DI, SI, SI))
+-DEF_LARCH_FTYPE (3, (DI, DI, USI, USI))
+-DEF_LARCH_FTYPE (3, (DI, DI, DI, QI))
++
++DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI))
++DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI))
++DEF_LARCH_FTYPE (3, (USI, USI, USI, USI))
+ DEF_LARCH_FTYPE (3, (UDI, UDI, UDI, USI))
++
++/* Vector builtin types.  */
++
++DEF_LARCH_FTYPE (1, (DF, V2DF))
++DEF_LARCH_FTYPE (1, (DF, V4DF))
+ DEF_LARCH_FTYPE (3, (DI, DI, V2HI, V2HI))
+ DEF_LARCH_FTYPE (3, (DI, DI, V4QI, V4QI))
+-DEF_LARCH_FTYPE (2, (DI, POINTER, SI))
+-DEF_LARCH_FTYPE (2, (DI, SI, SI))
+-DEF_LARCH_FTYPE (2, (DI, USI, USI))
+ DEF_LARCH_FTYPE (2, (DI, V2DI, UQI))
+ DEF_LARCH_FTYPE (2, (DI, V4DI, UQI))
+ 
+-DEF_LARCH_FTYPE (2, (INT, DF, DF))
+-DEF_LARCH_FTYPE (2, (INT, SF, SF))
+ DEF_LARCH_FTYPE (2, (INT, V2SF, V2SF))
+ DEF_LARCH_FTYPE (4, (INT, V2SF, V2SF, V2SF, V2SF))
+ 
+-DEF_LARCH_FTYPE (1, (SF, SF))
+-DEF_LARCH_FTYPE (2, (SF, SF, SF))
+ DEF_LARCH_FTYPE (1, (SF, V2SF))
+ DEF_LARCH_FTYPE (1, (SF, V4SF))
+ 
+-DEF_LARCH_FTYPE (2, (SI, DI, SI))
+-DEF_LARCH_FTYPE (2, (SI, POINTER, SI))
+-DEF_LARCH_FTYPE (1, (SI, SI))
+-DEF_LARCH_FTYPE (1, (USI, USI))
+-DEF_LARCH_FTYPE (1, (SI, UDI))
+-DEF_LARCH_FTYPE (2, (QI, QI, QI))
+-DEF_LARCH_FTYPE (2, (HI, HI, HI))
+-DEF_LARCH_FTYPE (2, (SI, QI, SI))
+-DEF_LARCH_FTYPE (2, (SI, HI, SI))
+-DEF_LARCH_FTYPE (2, (SI, SI, SI))
+-DEF_LARCH_FTYPE (2, (SI, SI, UQI))
+-DEF_LARCH_FTYPE (2, (USI, USI, USI))
+-DEF_LARCH_FTYPE (3, (SI, SI, SI, SI))
+-DEF_LARCH_FTYPE (3, (SI, SI, SI, QI))
+-DEF_LARCH_FTYPE (3, (USI, USI, USI, USI))
+-DEF_LARCH_FTYPE (1, (SI, UQI))
+ DEF_LARCH_FTYPE (1, (SI, UV16QI))
+ DEF_LARCH_FTYPE (1, (SI, UV32QI))
+ DEF_LARCH_FTYPE (1, (SI, UV2DI))
+@@ -106,9 +98,7 @@ DEF_LARCH_FTYPE (2, (SI, V4QI, V4QI))
+ DEF_LARCH_FTYPE (2, (SI, V4SI, UQI))
+ DEF_LARCH_FTYPE (2, (SI, V8SI, UQI))
+ DEF_LARCH_FTYPE (2, (SI, V8HI, UQI))
+-DEF_LARCH_FTYPE (1, (SI, VOID))
+ 
+-DEF_LARCH_FTYPE (2, (UDI, UDI, UDI))
+ DEF_LARCH_FTYPE (2, (USI, V32QI, UQI))
+ DEF_LARCH_FTYPE (2, (UDI, UV2SI, UV2SI))
+ DEF_LARCH_FTYPE (2, (USI, V8SI, UQI))
+@@ -119,8 +109,6 @@ DEF_LARCH_FTYPE (2, (UDI, V4DI, UQI))
+ DEF_LARCH_FTYPE (2, (USI, V16QI, UQI))
+ DEF_LARCH_FTYPE (2, (USI, V4SI, UQI))
+ DEF_LARCH_FTYPE (2, (USI, V8HI, UQI))
+-DEF_LARCH_FTYPE (1, (USI, VOID))
+-
+ DEF_LARCH_FTYPE (2, (UV16QI, UV16QI, UQI))
+ DEF_LARCH_FTYPE (2, (UV16QI, UV16QI, USI))
+ DEF_LARCH_FTYPE (2, (UV16QI, UV16QI, UV16QI))
+@@ -476,19 +464,6 @@ DEF_LARCH_FTYPE (2, (V8QI, V4HI, V4HI))
+ DEF_LARCH_FTYPE (1, (V8QI, V8QI))
+ DEF_LARCH_FTYPE (2, (V8QI, V8QI, V8QI))
+ 
+-DEF_LARCH_FTYPE (2, (VOID, SI, CVPOINTER))
+-DEF_LARCH_FTYPE (2, (VOID, SI, SI))
+-DEF_LARCH_FTYPE (2, (VOID, DI, DI))
+-DEF_LARCH_FTYPE (2, (VOID, UQI, SI))
+-DEF_LARCH_FTYPE (1, (VOID, USI))
+-DEF_LARCH_FTYPE (2, (VOID, USI, UQI))
+-DEF_LARCH_FTYPE (1, (VOID, UHI))
+-DEF_LARCH_FTYPE (2, (VOID, UQI, USI))
+-DEF_LARCH_FTYPE (2, (VOID, UHI, USI))
+-DEF_LARCH_FTYPE (2, (VOID, USI, USI))
+-DEF_LARCH_FTYPE (2, (VOID, UDI, USI))
+-DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI))
+-DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI))
+ DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, SI))
+ DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, DI))
+ DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, SI))
+@@ -648,36 +623,36 @@ DEF_LARCH_FTYPE (3, (V4SI, V4SI, UV16QI, V16QI))
+ DEF_LARCH_FTYPE (3, (UV4SI, UV4SI, UV16QI, UV16QI))
+ 
+ 
+-DEF_LARCH_FTYPE(2,(V4DI,V16HI,V16HI))
+-DEF_LARCH_FTYPE(2,(V4DI,UV4SI,V4SI))
+-DEF_LARCH_FTYPE(2,(V8SI,UV16HI,V16HI))
+-DEF_LARCH_FTYPE(2,(V16HI,UV32QI,V32QI))
+-DEF_LARCH_FTYPE(2,(V4DI,UV8SI,V8SI))
+-DEF_LARCH_FTYPE(3,(V4DI,V4DI,V16HI,V16HI))
+-DEF_LARCH_FTYPE(2,(UV32QI,V32QI,UV32QI))
+-DEF_LARCH_FTYPE(2,(UV16HI,V16HI,UV16HI))
+-DEF_LARCH_FTYPE(2,(UV8SI,V8SI,UV8SI))
+-DEF_LARCH_FTYPE(2,(UV4DI,V4DI,UV4DI))
+-DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV4DI,V4DI))
+-DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV8SI,V8SI))
+-DEF_LARCH_FTYPE(3,(V8SI,V8SI,UV16HI,V16HI))
+-DEF_LARCH_FTYPE(3,(V16HI,V16HI,UV32QI,V32QI))
+-DEF_LARCH_FTYPE(2,(V4DI,UV4DI,V4DI))
+-DEF_LARCH_FTYPE(2,(V8SI,V32QI,V32QI))
+-DEF_LARCH_FTYPE(2,(UV4DI,UV16HI,UV16HI))
+-DEF_LARCH_FTYPE(2,(V4DI,UV16HI,V16HI))
+-DEF_LARCH_FTYPE(3,(V8SI,V8SI,V32QI,V32QI))
+-DEF_LARCH_FTYPE(3,(UV8SI,UV8SI,UV32QI,UV32QI))
+-DEF_LARCH_FTYPE(3,(UV4DI,UV4DI,UV16HI,UV16HI))
+-DEF_LARCH_FTYPE(3,(V8SI,V8SI,UV32QI,V32QI))
+-DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV16HI,V16HI))
+-DEF_LARCH_FTYPE(2,(UV8SI,UV32QI,UV32QI))
+-DEF_LARCH_FTYPE(2,(V8SI,UV32QI,V32QI))
+-
+-DEF_LARCH_FTYPE(4,(VOID,V16QI,CVPOINTER,SI,UQI))
+-DEF_LARCH_FTYPE(4,(VOID,V8HI,CVPOINTER,SI,UQI))
+-DEF_LARCH_FTYPE(4,(VOID,V4SI,CVPOINTER,SI,UQI))
+-DEF_LARCH_FTYPE(4,(VOID,V2DI,CVPOINTER,SI,UQI))
++DEF_LARCH_FTYPE (2, (V4DI, V16HI, V16HI))
++DEF_LARCH_FTYPE (2, (V4DI, UV4SI, V4SI))
++DEF_LARCH_FTYPE (2, (V8SI, UV16HI, V16HI))
++DEF_LARCH_FTYPE (2, (V16HI, UV32QI, V32QI))
++DEF_LARCH_FTYPE (2, (V4DI, UV8SI, V8SI))
++DEF_LARCH_FTYPE (3, (V4DI, V4DI, V16HI, V16HI))
++DEF_LARCH_FTYPE (2, (UV32QI, V32QI, UV32QI))
++DEF_LARCH_FTYPE (2, (UV16HI, V16HI, UV16HI))
++DEF_LARCH_FTYPE (2, (UV8SI, V8SI, UV8SI))
++DEF_LARCH_FTYPE (2, (UV4DI, V4DI, UV4DI))
++DEF_LARCH_FTYPE (3, (V4DI, V4DI, UV4DI, V4DI))
++DEF_LARCH_FTYPE (3, (V4DI, V4DI, UV8SI, V8SI))
++DEF_LARCH_FTYPE (3, (V8SI, V8SI, UV16HI, V16HI))
++DEF_LARCH_FTYPE (3, (V16HI, V16HI, UV32QI, V32QI))
++DEF_LARCH_FTYPE (2, (V4DI, UV4DI, V4DI))
++DEF_LARCH_FTYPE (2, (V8SI, V32QI, V32QI))
++DEF_LARCH_FTYPE (2, (UV4DI, UV16HI, UV16HI))
++DEF_LARCH_FTYPE (2, (V4DI, UV16HI, V16HI))
++DEF_LARCH_FTYPE (3, (V8SI, V8SI, V32QI, V32QI))
++DEF_LARCH_FTYPE (3, (UV8SI, UV8SI, UV32QI, UV32QI))
++DEF_LARCH_FTYPE (3, (UV4DI, UV4DI, UV16HI, UV16HI))
++DEF_LARCH_FTYPE (3, (V8SI, V8SI, UV32QI, V32QI))
++DEF_LARCH_FTYPE (3, (V4DI, V4DI, UV16HI, V16HI))
++DEF_LARCH_FTYPE (2, (UV8SI, UV32QI, UV32QI))
++DEF_LARCH_FTYPE (2, (V8SI, UV32QI, V32QI))
++
++DEF_LARCH_FTYPE (4,  (VOID, V16QI, CVPOINTER, SI, UQI))
++DEF_LARCH_FTYPE (4,  (VOID, V8HI, CVPOINTER, SI, UQI))
++DEF_LARCH_FTYPE (4,  (VOID, V4SI, CVPOINTER, SI, UQI))
++DEF_LARCH_FTYPE (4,  (VOID, V2DI, CVPOINTER, SI, UQI))
+ 
+ DEF_LARCH_FTYPE (2, (DI, V16QI, UQI))
+ DEF_LARCH_FTYPE (2, (DI, V8HI, UQI))
+@@ -699,16 +674,16 @@ DEF_LARCH_FTYPE (3, (UV16HI, UV16HI, V16HI, USI))
+ DEF_LARCH_FTYPE (3, (UV8SI, UV8SI, V8SI, USI))
+ DEF_LARCH_FTYPE (3, (UV4DI, UV4DI, V4DI, USI))
+ 
+-DEF_LARCH_FTYPE(4,(VOID,V32QI,CVPOINTER,SI,UQI))
+-DEF_LARCH_FTYPE(4,(VOID,V16HI,CVPOINTER,SI,UQI))
+-DEF_LARCH_FTYPE(4,(VOID,V8SI,CVPOINTER,SI,UQI))
+-DEF_LARCH_FTYPE(4,(VOID,V4DI,CVPOINTER,SI,UQI))
++DEF_LARCH_FTYPE (4, (VOID, V32QI, CVPOINTER, SI, UQI))
++DEF_LARCH_FTYPE (4, (VOID, V16HI, CVPOINTER, SI, UQI))
++DEF_LARCH_FTYPE (4, (VOID, V8SI, CVPOINTER, SI, UQI))
++DEF_LARCH_FTYPE (4, (VOID, V4DI, CVPOINTER, SI, UQI))
+ 
+-DEF_LARCH_FTYPE (1, (BOOLEAN,V16QI))
+-DEF_LARCH_FTYPE(2,(V16QI,CVPOINTER,CVPOINTER))
+-DEF_LARCH_FTYPE(3,(VOID,V16QI,CVPOINTER,CVPOINTER))
+-DEF_LARCH_FTYPE(2,(V32QI,CVPOINTER,CVPOINTER))
+-DEF_LARCH_FTYPE(3,(VOID,V32QI,CVPOINTER,CVPOINTER))
++DEF_LARCH_FTYPE (1, (BOOLEAN, V16QI))
++DEF_LARCH_FTYPE (2, (V16QI, CVPOINTER, CVPOINTER))
++DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, CVPOINTER))
++DEF_LARCH_FTYPE (2, (V32QI, CVPOINTER, CVPOINTER))
++DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, CVPOINTER))
+ 
+ DEF_LARCH_FTYPE (3, (V16QI, V16QI, SI, UQI))
+ DEF_LARCH_FTYPE (3, (V2DI, V2DI, SI, UQI))
+diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def
+index fe5bc38d9..53392b484 100644
+--- a/gcc/config/loongarch/loongarch-modes.def
++++ b/gcc/config/loongarch/loongarch-modes.def
+@@ -1,5 +1,7 @@
+-/* LARCH extra machine modes. 
+-   Copyright (C) 2003-2018 Free Software Foundation, Inc.
++/* LoongArch extra machine modes.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Co. Ltd.
++   Based on MIPS target for GNU compiler.
+ 
+ This file is part of GCC.
+ 
+diff --git a/gcc/config/loongarch/loongarch-opts.c b/gcc/config/loongarch/loongarch-opts.c
+new file mode 100644
+index 000000000..cf11f67d1
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-opts.c
+@@ -0,0 +1,725 @@
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "obstack.h"
++#include "diagnostic-core.h"
++
++#include "loongarch-cpu.h"
++#include "loongarch-opts.h"
++#include "loongarch-str.h"
++#include "loongarch-def.h"
++
++struct loongarch_target la_target;
++
++/* ABI-related configuration.  */
++#define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi))
++static const struct loongarch_abi
++abi_priority_list[] = {
++    {ABI_BASE_LP64D, ABI_EXT_BASE},
++    {ABI_BASE_LP64F, ABI_EXT_BASE},
++    {ABI_BASE_LP64S, ABI_EXT_BASE},
++};
++
++/* Initialize enabled_abi_types from TM_MULTILIB_LIST.  */
++#ifdef LA_DISABLE_MULTILIB
++#define MULTILIB_LIST_LEN 1
++#else
++#define MULTILIB_LIST_LEN (sizeof (tm_multilib_list) / sizeof (int) / 2)
++static const int tm_multilib_list[] = { TM_MULTILIB_LIST };
++#endif
++static int enabled_abi_types[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = { 0 };
++
++#define isa_required(ABI) (abi_minimal_isa[(ABI).base][(ABI).ext])
++extern "C" const struct loongarch_isa
++abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES];
++
++static inline int
++is_multilib_enabled (struct loongarch_abi abi)
++{
++  return enabled_abi_types[abi.base][abi.ext];
++}
++
++static void
++init_enabled_abi_types ()
++{
++#ifdef LA_DISABLE_MULTILIB
++  enabled_abi_types[DEFAULT_ABI_BASE][DEFAULT_ABI_EXT] = 1;
++#else
++  int abi_base, abi_ext;
++  for (unsigned int i = 0; i < MULTILIB_LIST_LEN; i++)
++    {
++      abi_base = tm_multilib_list[i << 1];
++      abi_ext = tm_multilib_list[(i << 1) + 1];
++      enabled_abi_types[abi_base][abi_ext] = 1;
++    }
++#endif
++}
++
++/* String processing.  */
++static struct obstack msg_obstack;
++#define APPEND_STRING(STR) obstack_grow (&msg_obstack, STR, strlen(STR));
++#define APPEND1(CH) obstack_1grow(&msg_obstack, CH);
++
++static const char* abi_str (struct loongarch_abi abi);
++static const char* isa_str (const struct loongarch_isa *isa, char separator);
++static const char* arch_str (const struct loongarch_target *target);
++static const char* multilib_enabled_abi_list (); /* Misc */
++static struct loongarch_abi isa_default_abi (const struct loongarch_isa *isa);
++static int isa_base_compat_p (const struct loongarch_isa *set1,
++			      const struct loongarch_isa *set2);
++static int isa_fpu_compat_p (const struct loongarch_isa *set1,
++			     const struct loongarch_isa *set2);
++static int abi_compat_p (const struct loongarch_isa *isa,
++			 struct loongarch_abi abi);
++static int abi_default_cpu_arch (struct loongarch_abi abi, struct loongarch_isa *isa);
++
++/* Mandatory configure-time defaults.  */
++#ifndef DEFAULT_ABI_BASE
++#error missing definition of DEFAULT_ABI_BASE in ${tm_defines}.
++#endif
++
++#ifndef DEFAULT_ABI_EXT
++#error missing definition of DEFAULT_ABI_EXT in ${tm_defines}.
++#endif
++
++#ifndef DEFAULT_CPU_ARCH
++#error missing definition of DEFAULT_CPU_ARCH in ${tm_defines}.
++#endif
++
++/* Optional configure-time defaults.  */
++#ifdef DEFAULT_CPU_TUNE
++static int with_default_tune = 1;
++#else
++#define DEFAULT_CPU_TUNE -1
++static int with_default_tune = 0;
++#endif
++
++#ifdef DEFAULT_ISA_EXT_FPU
++static int with_default_fpu = 1;
++#else
++#define DEFAULT_ISA_EXT_FPU -1
++static int with_default_fpu = 0;
++#endif
++
++#ifdef DEFAULT_ISA_EXT_SIMD
++static int with_default_simd = 1;
++#else
++#define DEFAULT_ISA_EXT_SIMD -1
++static int with_default_simd = 0;
++#endif
++
++
++/* Initialize loongarch_target from separate option variables.  */
++
++void
++loongarch_init_target (struct loongarch_target *target,
++		       int cpu_arch, int cpu_tune, int fpu, int simd,
++		       int abi_base, int abi_ext, int cmodel)
++{
++  if (!target)
++    return;
++  target->cpu_arch = cpu_arch;
++  target->cpu_tune = cpu_tune;
++  target->isa.fpu = fpu;
++  target->isa.simd = simd;
++  target->abi.base = abi_base;
++  target->abi.ext = abi_ext;
++  target->cmodel = cmodel;
++}
++
++
++/* Handle combinations of -m parameters
++   (see loongarch.opt and loongarch-opts.h).  */
++
++void
++loongarch_config_target (struct loongarch_target *target,
++			 struct loongarch_flags *flags,
++			 int follow_multilib_list_p)
++{
++  struct loongarch_target t;
++  if (!target)
++    return;
++
++  /* Initialization */
++  init_enabled_abi_types ();
++  obstack_init (&msg_obstack);
++
++  struct {
++    int arch, tune, fpu, simd, abi_base, abi_ext, cmodel, abi_flt;
++  } constrained = {
++      M_OPT_ABSENT (target->cpu_arch)	  ? 0 : 1,
++      M_OPT_ABSENT (target->cpu_tune)	  ? 0 : 1,
++      M_OPT_ABSENT (target->isa.fpu)	  ? 0 : 1,
++      M_OPT_ABSENT (target->isa.simd)	  ? 0 : 1,
++      M_OPT_ABSENT (target->abi.base)	  ? 0 : 1,
++      M_OPT_ABSENT (target->abi.ext)	  ? 0 : 1,
++      M_OPT_ABSENT (target->cmodel)	  ? 0 : 1,
++      M_OPT_ABSENT (target->abi.base)	  ? 0 : 1,
++  };
++
++  /* 1.  Target ABI */
++  if (constrained.abi_base && target->abi.base >= N_ABI_BASE_TYPES)
++    /* Special treatments for legacy options ("-mabi=lp64")
++       in GCC driver.  */
++    switch (target->abi.base)
++      {
++      case ABI_BASE_LP64:
++	t.abi.base = TO_LP64_ABI_BASE (DEFAULT_ABI_BASE);
++	constrained.abi_flt = 0;
++	break;
++
++      default:
++	gcc_unreachable ();
++      }
++  else if (constrained.abi_base)
++    t.abi.base = target->abi.base;
++  else
++    t.abi.base = DEFAULT_ABI_BASE;
++
++  t.abi.ext = constrained.abi_ext ? target->abi.ext : DEFAULT_ABI_EXT;
++
++  /* Process -m*-float flags */
++  if (flags && !M_OPT_ABSENT (flags->flt))
++    {
++      /* Modifying the original "target" here makes it easier to write the
++         t.isa.fpu assignment below, because otherwise there would be three
++	 levels of precedence (-m*-float / -mfpu / -march) to be handled
++	 (now the first two are merged). */
++
++      target->isa.fpu = flags->flt;
++      constrained.fpu = 1;
++
++      /* The target ISA is not ready yet, but (isa_required (t.abi)
++         + forced fpu) is enough for computing the forced base ABI.  */
++
++      struct loongarch_isa force_isa = isa_required (t.abi);
++      force_isa.fpu = flags->flt;
++
++      struct loongarch_abi force_abi;
++      force_abi.base = isa_default_abi (&force_isa).base;
++
++      if (constrained.abi_base && constrained.abi_flt
++	  && (t.abi.base != force_abi.base))
++	{
++	  force_abi.ext = t.abi.ext;
++	  inform (UNKNOWN_LOCATION,
++		  "%<-m%s%> overrides %<-m%s=%s%>, adjusting ABI to %qs",
++		  flags->flt_str, OPTSTR_ABI_BASE,
++		  loongarch_abi_base_strings[t.abi.base],
++		  abi_str (force_abi));
++	}
++
++      t.abi.base = force_abi.base;
++      constrained.abi_flt = 1;
++    }
++
++#ifdef LA_DISABLE_MULTILIB
++  if (follow_multilib_list_p)
++    if (t.abi.base != DEFAULT_ABI_BASE || t.abi.ext != DEFAULT_ABI_EXT)
++      {
++	static const struct loongarch_abi default_abi
++	  = {DEFAULT_ABI_BASE, DEFAULT_ABI_EXT};
++
++	warning (0, "ABI changed (%qs to %qs) while multilib is disabled",
++		 abi_str (default_abi), abi_str (t.abi));
++      }
++#endif
++
++  /* 2.  Target CPU */
++  t.cpu_arch = constrained.arch ? target->cpu_arch : DEFAULT_CPU_ARCH;
++
++  /* If cpu_tune is not set using neither -mtune nor --with-tune,
++     the current cpu_arch is used as its default. */
++  t.cpu_tune = constrained.tune ? target->cpu_tune
++    : (constrained.arch ? target->cpu_arch :
++       (with_default_tune ? DEFAULT_CPU_TUNE : DEFAULT_CPU_ARCH));
++
++
++  /* Handle -march/tune=native */
++#ifdef __loongarch__
++  /* For native compilers, gather local CPU information
++     and fill the "CPU_NATIVE" index of arrays defined in
++     loongarch-cpu.c.  */
++
++  fill_native_cpu_config (&t);
++
++#else
++  if (t.cpu_arch == CPU_NATIVE)
++    fatal_error (UNKNOWN_LOCATION,
++		 "%qs does not work on a cross compiler",
++		 "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
++
++  else if (t.cpu_tune == CPU_NATIVE)
++    fatal_error (UNKNOWN_LOCATION,
++		 "%qs does not work on a cross compiler",
++		 "-m" OPTSTR_TUNE "=" STR_CPU_NATIVE);
++#endif
++
++  /* Handle -march/tune=abi-default */
++  if (t.cpu_tune == CPU_ABI_DEFAULT)
++    t.cpu_tune = abi_default_cpu_arch (t.abi, NULL);
++
++  if (t.cpu_arch == CPU_ABI_DEFAULT)
++    {
++      t.cpu_arch = abi_default_cpu_arch (t.abi, &(t.isa));
++      loongarch_cpu_default_isa[t.cpu_arch] = t.isa;
++    }
++
++  /* 3.  Target base ISA */
++config_target_isa:
++
++  /* Get default ISA from "-march" or its default value.  */
++  t.isa = loongarch_cpu_default_isa[t.cpu_arch];
++
++  /* Apply incremental changes.  */
++  /* "-march=native" overrides the default FPU type.  */
++
++  t.isa.fpu = constrained.fpu ? target->isa.fpu :
++    (constrained.arch ? t.isa.fpu :
++    (with_default_fpu ? DEFAULT_ISA_EXT_FPU : t.isa.fpu));
++
++  t.isa.simd = constrained.simd ? target->isa.simd :
++    (constrained.arch ? t.isa.simd :
++    (with_default_simd ? DEFAULT_ISA_EXT_SIMD : t.isa.simd));
++
++  /* apply -m[no-]lsx and -m[no-]lasx flags */
++  if (flags)
++    for (int i = 0; i < 2; i++)
++      {
++	switch (SX_FLAG_TYPE (flags->sx[i]))
++	  {
++	  case ISA_EXT_SIMD_LSX:
++	    constrained.simd = 1;
++	    if (flags->sx[i] > 0 && t.isa.simd != ISA_EXT_SIMD_LASX)
++	      t.isa.simd = ISA_EXT_SIMD_LSX;
++	    else if (flags->sx[i] < 0)
++	      t.isa.simd = ISA_EXT_NONE;
++	    break;
++
++	  case ISA_EXT_SIMD_LASX:
++	    constrained.simd = 1;
++	    if (flags->sx[i] < 0 && t.isa.simd == ISA_EXT_SIMD_LASX)
++	      t.isa.simd = ISA_EXT_SIMD_LSX;
++	    else if (flags->sx[i] > 0)
++	      t.isa.simd = ISA_EXT_SIMD_LASX;
++	    break;
++
++	  case 0:
++	    break;
++
++	  default:
++	    gcc_unreachable();
++	  }
++      }
++
++  /* All SIMD extensions imply a 64-bit FPU:
++     - silently adjust t.isa.fpu to "fpu64" if it is unconstrained.
++     - warn if -msingle-float / -msoft-float is on,
++       then disable SIMD extensions (done in driver)
++     - abort if -mfpu=0 / -mfpu=32 is forced.  */
++
++  if (t.isa.simd != ISA_EXT_NONE && t.isa.fpu != ISA_EXT_FPU64)
++    {
++      if (!constrained.fpu)
++	{
++	  /* As long as the arch-default "t.isa.simd" is set to non-zero
++	     for an element "t" in loongarch_cpu_default_isa, "t.isa.fpu"
++	     should be set to "ISA_EXT_FPU64" accordingly.  Thus reaching
++	     here must be the result of forcing -mlsx/-mlasx explicitly.  */
++	  gcc_assert (constrained.simd);
++
++	  inform (UNKNOWN_LOCATION,
++		  "enabing %qs promotes %<%s%s%> to %<%s%s%>",
++                  loongarch_isa_ext_strings[t.isa.simd],
++		  OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[t.isa.fpu],
++		  OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]);
++
++	  t.isa.fpu = ISA_EXT_FPU64;
++	}
++      else if (flags && (flags->flt == ISA_EXT_NONE || flags->flt == ISA_EXT_FPU32))
++	{
++	  if (constrained.simd)
++	    inform (UNKNOWN_LOCATION,
++		    "%qs is disabled by %<-m%s%>, because it requires %<%s%s%>",
++		    loongarch_isa_ext_strings[t.isa.simd], flags->flt_str,
++		    OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]);
++
++	  t.isa.simd = ISA_EXT_NONE;
++	}
++      else
++	{
++	  /* -mfpu=0 / -mfpu=32 is set.  */
++	  if (constrained.simd)
++	    fatal_error (UNKNOWN_LOCATION,
++			 "%<-m%s=%s%> conflicts with %qs, which requires %<%s%s%>",
++			 OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[t.isa.fpu],
++                         loongarch_isa_ext_strings[t.isa.simd],
++                         OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]);
++
++	  /* Same as above.  */
++	  t.isa.simd = ISA_EXT_NONE;
++	}
++    }
++
++
++  /* 4.  ABI-ISA compatibility */
++  /* Note:
++     - There IS a unique default -march value for each ABI type
++       (config.gcc: triplet -> abi -> default arch).
++
++     - If the base ABI is incompatible with the default arch,
++       try using the default -march it implies (and mark it
++       as "constrained" this time), then re-apply step 3.  */
++
++  struct loongarch_abi abi_tmp;
++  const struct loongarch_isa* isa_min;
++
++  abi_tmp = t.abi;
++  isa_min = &isa_required (abi_tmp);
++
++  if (isa_base_compat_p (&t.isa, isa_min)); /* OK */
++  else if (!constrained.arch)
++    {
++      /* Base architecture can only be implied by -march,
++	 so we adjust that first if it is not constrained.  */
++      int fallback_arch = abi_default_cpu_arch (t.abi, NULL);
++
++      if (t.cpu_arch == CPU_NATIVE)
++	warning (0, "your native CPU architecture (%qs) "
++		 "does not support %qs ABI, falling back to %<-m%s=%s%>",
++		 arch_str (&t), abi_str (t.abi), OPTSTR_ARCH,
++		 loongarch_cpu_strings[fallback_arch]);
++      else
++	warning (0, "default CPU architecture (%qs) "
++		 "does not support %qs ABI, falling back to %<-m%s=%s%>",
++		 arch_str (&t), abi_str (t.abi), OPTSTR_ARCH,
++		 loongarch_cpu_strings[fallback_arch]);
++
++      t.cpu_arch = fallback_arch;
++      constrained.arch = 1;
++      goto config_target_isa;
++    }
++  else if (!constrained.abi_base)
++    {
++      /* If -march is given while -mabi is not,
++	 try selecting another base ABI type.  */
++      abi_tmp.base = isa_default_abi (&t.isa).base;
++    }
++  else
++    goto fatal;
++
++  if (isa_fpu_compat_p (&t.isa, isa_min)); /* OK */
++  else if (!constrained.fpu)
++    t.isa.fpu = isa_min->fpu;
++  else if (!constrained.abi_base)
++    /* If -march is compatible with the default ABI
++       while -mfpu is not.  */
++    abi_tmp.base = isa_default_abi (&t.isa).base;
++  else
++    goto fatal;
++
++  if (0)
++fatal:
++    fatal_error (UNKNOWN_LOCATION,
++		 "unable to implement ABI %qs with instruction set %qs",
++		 abi_str (t.abi), isa_str (&t.isa, '/'));
++
++
++  /* Using the fallback ABI.  */
++  if (abi_tmp.base != t.abi.base || abi_tmp.ext != t.abi.ext)
++    {
++      /* This flag is only set in the GCC driver.  */
++      if (follow_multilib_list_p)
++	{
++
++	  /* Continue falling back until we find a feasible ABI type
++	     enabled by TM_MULTILIB_LIST.  */
++	  if (!is_multilib_enabled (abi_tmp))
++	    {
++	      for (unsigned int i = 0; i < ABI_COUNT; i++)
++		{
++		  if (is_multilib_enabled (abi_priority_list[i])
++		      && abi_compat_p (&t.isa, abi_priority_list[i]))
++		    {
++		      abi_tmp = abi_priority_list[i];
++
++		      warning (0, "ABI %qs cannot be implemented due to "
++			       "limited instruction set %qs, "
++			       "falling back to %qs", abi_str (t.abi),
++			       isa_str (&t.isa, '/'), abi_str (abi_tmp));
++
++		      goto fallback;
++		    }
++		}
++
++	      /* Otherwise, keep using abi_tmp with a warning.  */
++#ifdef LA_DISABLE_MULTILIB
++	      warning (0, "instruction set %qs cannot implement "
++		       "default ABI %qs, falling back to %qs",
++		       isa_str (&t.isa, '/'), abi_str (t.abi),
++		       abi_str (abi_tmp));
++#else
++	      warning (0, "no multilib-enabled ABI (%qs) can be implemented "
++		       "with instruction set %qs, falling back to %qs",
++		       multilib_enabled_abi_list (),
++		       isa_str (&t.isa, '/'), abi_str (abi_tmp));
++#endif
++	    }
++	}
++
++fallback:
++      t.abi = abi_tmp;
++    }
++  else if (follow_multilib_list_p)
++    {
++      if (!is_multilib_enabled (t.abi))
++	{
++	  inform (UNKNOWN_LOCATION,
++		  "ABI %qs is not enabled at configure-time, "
++		  "the linker might report an error", abi_str (t.abi));
++
++	  inform (UNKNOWN_LOCATION, "ABI with startfiles: %s",
++		  multilib_enabled_abi_list ());
++	}
++    }
++
++
++  /* 5.  Target code model */
++  t.cmodel = constrained.cmodel ? target->cmodel : CMODEL_NORMAL;
++
++  /* Cleanup and return.  */
++  obstack_free (&msg_obstack, NULL);
++  *target = t;
++}
++
++/* Returns the default ABI for the given instruction set.  */
++static inline struct loongarch_abi
++isa_default_abi (const struct loongarch_isa *isa)
++{
++  struct loongarch_abi abi;
++
++  switch (isa->fpu)
++    {
++      case ISA_EXT_FPU64:
++	if (isa->base == ISA_BASE_LA64V100)
++	  abi.base = ABI_BASE_LP64D;
++	break;
++
++      case ISA_EXT_FPU32:
++	if (isa->base == ISA_BASE_LA64V100)
++	  abi.base = ABI_BASE_LP64F;
++	break;
++
++      case ISA_EXT_NONE:
++	if (isa->base == ISA_BASE_LA64V100)
++	  abi.base = ABI_BASE_LP64S;
++	break;
++
++      default:
++	gcc_unreachable ();
++    }
++
++  abi.ext = ABI_EXT_BASE;
++  return abi;
++}
++
++/* Check if set2 is a subset of set1.  */
++static inline int
++isa_base_compat_p (const struct loongarch_isa *set1,
++		   const struct loongarch_isa *set2)
++{
++  switch (set2->base)
++    {
++      case ISA_BASE_LA64V100:
++	return (set1->base == ISA_BASE_LA64V100);
++
++      default:
++	gcc_unreachable ();
++    }
++}
++
++static inline int
++isa_fpu_compat_p (const struct loongarch_isa *set1,
++		  const struct loongarch_isa *set2)
++{
++  switch (set2->fpu)
++    {
++      case ISA_EXT_FPU64:
++	return set1->fpu == ISA_EXT_FPU64;
++
++      case ISA_EXT_FPU32:
++	return set1->fpu == ISA_EXT_FPU32 || set1->fpu == ISA_EXT_FPU64;
++
++      case ISA_EXT_NONE:
++	return 1;
++
++      default:
++	gcc_unreachable ();
++    }
++
++}
++
++static inline int
++abi_compat_p (const struct loongarch_isa *isa, struct loongarch_abi abi)
++{
++  int compatible = 1;
++  const struct loongarch_isa *isa2 = &isa_required (abi);
++
++  /* Append conditionals for new ISA components below.  */
++  compatible = compatible && isa_base_compat_p (isa, isa2);
++  compatible = compatible && isa_fpu_compat_p (isa, isa2);
++  return compatible;
++}
++
++/* The behavior of this function should be consistent
++   with config.gcc.  */
++static int
++abi_default_cpu_arch (struct loongarch_abi abi,
++		      struct loongarch_isa *isa)
++{
++  static struct loongarch_isa tmp;
++  if (!isa)
++    isa = &tmp;
++
++  if (abi.ext == ABI_EXT_BASE)
++    switch (abi.base)
++      {
++	case ABI_BASE_LP64D:
++	case ABI_BASE_LP64F:
++	case ABI_BASE_LP64S:
++	  *isa = isa_required (abi);
++	  return CPU_LOONGARCH64;
++      }
++  gcc_unreachable ();
++}
++
++static const char*
++abi_str (struct loongarch_abi abi)
++{
++  /* "/base" can be omitted.  */
++  if (abi.ext == ABI_EXT_BASE)
++    return (const char*)
++      obstack_copy0 (&msg_obstack, loongarch_abi_base_strings[abi.base],
++		     strlen (loongarch_abi_base_strings[abi.base]));
++  else
++    {
++      APPEND_STRING (loongarch_abi_base_strings[abi.base])
++      APPEND1 ('/')
++      APPEND_STRING (loongarch_abi_ext_strings[abi.ext])
++      APPEND1 ('\0')
++
++      return XOBFINISH (&msg_obstack, const char *);
++    }
++}
++
++static const char*
++isa_str (const struct loongarch_isa *isa, char separator)
++{
++  APPEND_STRING (loongarch_isa_base_strings[isa->base])
++  APPEND1 (separator)
++
++  if (isa->fpu == ISA_EXT_NONE)
++    {
++      APPEND_STRING ("no" OPTSTR_ISA_EXT_FPU)
++    }
++  else
++    {
++      APPEND_STRING (OPTSTR_ISA_EXT_FPU)
++      APPEND_STRING (loongarch_isa_ext_strings[isa->fpu])
++    }
++
++  switch (isa->simd)
++    {
++      case ISA_EXT_SIMD_LSX:
++      case ISA_EXT_SIMD_LASX:
++	APPEND1 (separator);
++	APPEND_STRING (loongarch_isa_ext_strings[isa->simd]);
++	break;
++
++      default:
++	gcc_assert (isa->simd == 0);
++    }
++  APPEND1 ('\0')
++
++  /* Add more here.  */
++
++  return XOBFINISH (&msg_obstack, const char *);
++}
++
++static const char*
++arch_str (const struct loongarch_target *target)
++{
++  if (target->cpu_arch == CPU_NATIVE)
++    {
++      /* Describe a native CPU with unknown PRID.  */
++      const char* isa_string = isa_str (&target->isa, ',');
++      APPEND_STRING ("PRID: 0x")
++      APPEND_STRING (get_native_prid_str ())
++      APPEND_STRING (", ISA features: ")
++      APPEND_STRING (isa_string)
++    }
++  else
++    APPEND_STRING (loongarch_cpu_strings[target->cpu_arch]);
++
++  APPEND1 ('\0')
++  return XOBFINISH (&msg_obstack, const char *);
++}
++
++static const char*
++multilib_enabled_abi_list ()
++{
++  int enabled_abi_idx[MULTILIB_LIST_LEN] = { 0 };
++  const char* enabled_abi_str[MULTILIB_LIST_LEN] = { NULL };
++  unsigned int j = 0;
++
++  for (unsigned int i = 0; i < ABI_COUNT && j < MULTILIB_LIST_LEN; i++)
++    {
++      if (enabled_abi_types[abi_priority_list[i].base]
++	  [abi_priority_list[i].ext])
++	{
++	  enabled_abi_idx[j++] = i;
++	}
++    }
++
++  for (unsigned int k = 0; k < j; k++)
++    {
++      enabled_abi_str[k] = abi_str (abi_priority_list[enabled_abi_idx[k]]);
++    }
++
++  for (unsigned int k = 0; k < j - 1; k++)
++    {
++      APPEND_STRING (enabled_abi_str[k])
++      APPEND1 (',')
++      APPEND1 (' ')
++    }
++  APPEND_STRING (enabled_abi_str[j - 1])
++  APPEND1 ('\0')
++
++  return XOBFINISH (&msg_obstack, const char *);
++}
++
++/* option status feedback for "gcc --help=target -Q" */
++void
++loongarch_update_gcc_opt_status (struct loongarch_target *target,
++				 struct gcc_options *opts,
++				 struct gcc_options *opts_set)
++{
++  (void) opts_set;
++
++  /* status of -mabi */
++  opts->x_la_opt_abi_base = target->abi.base;
++
++  opts->x_target_flags |=
++    IS_LP64_ABI_BASE (target->abi.base) ? MASK_LP64 : 0;
++
++  /* status of -march and -mtune */
++  opts->x_la_opt_cpu_arch = target->cpu_arch;
++  opts->x_la_opt_cpu_tune = target->cpu_tune;
++
++  /* status of -mfpu and -msimd */
++  opts->x_la_opt_fpu = target->isa.fpu;
++  opts->x_la_opt_simd = target->isa.simd;
++}
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 21639fa74..33eb8b2da 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
++++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -1,5 +1,6 @@
+-/* Definitions for option handling for LARCH.
+-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
++/* Definitions for loongarch-specific option handling.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
+ 
+ This file is part of GCC.
+ 
+@@ -17,18 +18,81 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+-#ifndef LARCH_OPTS_H
+-#define LARCH_OPTS_H
++#ifndef LOONGARCH_OPTS_H
++#define LOONGARCH_OPTS_H
+ 
+-#define LARCH_ARCH_OPTION_NATIVE -1
++#include "loongarch-def.h"
+ 
++/* Target configuration */
++extern struct loongarch_target la_target;
+ 
+-enum loongarch_code_model {
+-  LARCH_CMODEL_NORMAL,
+-  LARCH_CMODEL_TINY,
+-  LARCH_CMODEL_TINY_STATIC,
+-  LARCH_CMODEL_LARGE,
+-  LARCH_CMODEL_EXTREME
++/* Flag status */
++struct loongarch_flags {
++    int flt; const char* flt_str;
++#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
++    int sx[2];
+ };
+ 
++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
++
++/* Initialize loongarch_target from separate option variables.  */
++void
++loongarch_init_target (struct loongarch_target *target,
++		       int cpu_arch, int cpu_tune, int fpu, int simd,
++		       int abi_base, int abi_ext, int cmodel);
++
++
++/* Handler for "-m" option combinations,
++   shared by the driver and the compiler proper.  */
++void
++loongarch_config_target (struct loongarch_target *target,
++			 struct loongarch_flags *flags,
++			 int follow_multilib_list_p);
++
++/* option status feedback for "gcc --help=target -Q" */
++void
++loongarch_update_gcc_opt_status (struct loongarch_target *target,
++				 struct gcc_options *opts,
++				 struct gcc_options *opts_set);
+ #endif
++
++
++/* Macros for common conditional expressions used in loongarch.{c,h,md} */
++#define TARGET_CMODEL_NORMAL	    (la_target.cmodel == CMODEL_NORMAL)
++#define TARGET_CMODEL_TINY	    (la_target.cmodel == CMODEL_TINY)
++#define TARGET_CMODEL_TINY_STATIC   (la_target.cmodel == CMODEL_TINY_STATIC)
++#define TARGET_CMODEL_LARGE	    (la_target.cmodel == CMODEL_LARGE)
++#define TARGET_CMODEL_EXTREME	    (la_target.cmodel == CMODEL_EXTREME)
++
++#define TARGET_HARD_FLOAT	    (la_target.isa.fpu != ISA_EXT_NONE)
++#define TARGET_HARD_FLOAT_ABI	    (la_target.abi.base == ABI_BASE_LP64D \
++				     || la_target.abi.base == ABI_BASE_LP64F)
++
++#define TARGET_SOFT_FLOAT	  (la_target.isa.fpu == ISA_EXT_NONE)
++#define TARGET_SOFT_FLOAT_ABI	  (la_target.abi.base == ABI_BASE_LP64S)
++#define TARGET_SINGLE_FLOAT	  (la_target.isa.fpu == ISA_EXT_FPU32)
++#define TARGET_SINGLE_FLOAT_ABI	  (la_target.abi.base == ABI_BASE_LP64F)
++#define TARGET_DOUBLE_FLOAT	  (la_target.isa.fpu == ISA_EXT_FPU64)
++#define TARGET_DOUBLE_FLOAT_ABI	  (la_target.abi.base == ABI_BASE_LP64D)
++
++#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64V100)
++#define TARGET_ABI_LP64		  (la_target.abi.base == ABI_BASE_LP64D	\
++				   || la_target.abi.base == ABI_BASE_LP64F \
++				   || la_target.abi.base == ABI_BASE_LP64S)
++
++#define ISA_HAS_LSX		  (la_target.isa.simd == ISA_EXT_SIMD_LSX \
++				   || la_target.isa.simd == ISA_EXT_SIMD_LASX)
++#define ISA_HAS_LASX		  (la_target.isa.simd == ISA_EXT_SIMD_LASX)
++
++
++/* TARGET_ macros for use in *.md template conditionals */
++#define TARGET_uARCH_LA464	  (la_target.cpu_tune == CPU_LA464)
++#define TARGET_uARCH_LA364	  (la_target.cpu_tune == CPU_LA364)
++#define TARGET_uARCH_LA264	  (la_target.cpu_tune == CPU_LA264)
++#define TARGET_uARCH_LA664	  (la_target.cpu_tune == CPU_LA664)
++
++/* Note: optimize_size may vary across functions,
++   while -m[no]-memcpy imposes a global constraint.  */
++#define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P  loongarch_do_optimize_block_move_p()
++
++#endif /* LOONGARCH_OPTS_H */
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index c36fdd37d..498d80514 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
++++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -1,9 +1,7 @@
+-/* Prototypes of target machine for GNU compiler.  LARCH version.
++/* Prototypes of target machine for GNU compiler.  LoongArch version.
+    Copyright (C) 1989-2018 Free Software Foundation, Inc.
+-   Contributed by A. Lichnewsky (lich@inria.inria.fr).
+-   Changed by Michael Meissner	(meissner@osf.org).
+-   64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
+-   Brendan Eich (brendan@microunity.com).
++   Contributed by Loongson Ltd.
++   Based on MIPS target for GNU compiler.
+ 
+ This file is part of GCC.
+ 
+@@ -21,24 +19,8 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+-#ifndef GCC_LARCH_PROTOS_H
+-#define GCC_LARCH_PROTOS_H
+-
+-/* Describes how a symbol is used.
+-
+-   SYMBOL_CONTEXT_CALL
+-       The symbol is used as the target of a call instruction.
+-
+-   SYMBOL_CONTEXT_LEA
+-       The symbol is used in a load-address operation.
+-
+-   SYMBOL_CONTEXT_MEM
+-       The symbol is used as the address in a MEM.  */
+-enum loongarch_symbol_context {
+-  SYMBOL_CONTEXT_CALL,
+-  SYMBOL_CONTEXT_LEA,
+-  SYMBOL_CONTEXT_MEM
+-};
++#ifndef GCC_LOONGARCH_PROTOS_H
++#define GCC_LOONGARCH_PROTOS_H
+ 
+ /* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address.
+ 
+@@ -57,67 +39,30 @@ enum loongarch_symbol_type {
+   SYMBOL_GOT_DISP,
+   SYMBOL_TLS,
+   SYMBOL_TLSGD,
+-  SYMBOL_TLSLDM,
++  SYMBOL_TLSLDM
+ };
+ #define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1)
+ 
+-/* Classifies a type of call.
+-
+-   LARCH_CALL_NORMAL
+-	A normal call or call_value pattern.
+-
+-   LARCH_CALL_SIBCALL
+-	A sibcall or sibcall_value pattern.
+-
+-   LARCH_CALL_EPILOGUE
+-	A call inserted in the epilogue.  */
+-enum loongarch_call_type {
+-  LARCH_CALL_NORMAL,
+-  LARCH_CALL_SIBCALL,
+-  LARCH_CALL_EPILOGUE
+-};
+-
+-/* Controls the conditions under which certain instructions are split.
+-
+-   SPLIT_IF_NECESSARY
+-	Only perform splits that are necessary for correctness
+-	(because no unsplit version exists).
+-
+-   SPLIT_FOR_SPEED
+-	Perform splits that are necessary for correctness or
+-	beneficial for code speed.
+-
+-   SPLIT_FOR_SIZE
+-	Perform splits that are necessary for correctness or
+-	beneficial for code size.  */
+-enum loongarch_split_type {
+-  SPLIT_IF_NECESSARY,
+-  SPLIT_FOR_SPEED,
+-  SPLIT_FOR_SIZE
+-};
+ extern const char *const loongarch_fp_conditions[16];
+ 
+-extern const char *loongarch_output_gpr_save (unsigned);
++/* Routines implemented in loongarch.c.  */
++extern rtx loongarch_emit_move (rtx, rtx);
+ extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
+ extern void loongarch_expand_prologue (void);
+ extern void loongarch_expand_epilogue (bool);
+ extern bool loongarch_can_use_return_insn (void);
+-extern rtx loongarch_function_value (const_tree, const_tree, enum machine_mode);
+-extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_context,
+-				      enum loongarch_symbol_type *);
++
++extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
+ extern int loongarch_regno_mode_ok_for_base_p (int, machine_mode, bool);
+-extern bool loongarch_stack_address_p (rtx, machine_mode);
+ extern int loongarch_address_insns (rtx, machine_mode, bool);
+ extern int loongarch_const_insns (rtx);
+ extern int loongarch_split_const_insns (rtx);
+ extern int loongarch_split_128bit_const_insns (rtx);
+ extern int loongarch_load_store_insns (rtx, rtx_insn *);
+ extern int loongarch_idiv_insns (machine_mode);
+-extern rtx loongarch_emit_move (rtx, rtx);
+ #ifdef RTX_CODE
+ extern void loongarch_emit_binary (enum rtx_code, rtx, rtx, rtx);
+ #endif
+-extern rtx loongarch_pic_base_register (rtx);
+ extern bool loongarch_split_symbol (rtx, rtx, machine_mode, rtx *);
+ extern rtx loongarch_unspec_address (rtx, enum loongarch_symbol_type);
+ extern rtx loongarch_strip_unspec_address (rtx);
+@@ -126,9 +71,9 @@ extern bool loongarch_legitimize_move (machine_mode, rtx, rtx);
+ extern rtx loongarch_legitimize_call_address (rtx);
+ 
+ extern rtx loongarch_subword (rtx, bool);
+-extern bool loongarch_split_move_p (rtx, rtx, enum loongarch_split_type);
+-extern void loongarch_split_move (rtx, rtx, enum loongarch_split_type, rtx);
+-extern bool loongarch_split_move_insn_p (rtx, rtx, rtx);
++extern bool loongarch_split_move_p (rtx, rtx);
++extern void loongarch_split_move (rtx, rtx, rtx);
++extern bool loongarch_split_move_insn_p (rtx, rtx);
+ extern void loongarch_split_move_insn (rtx, rtx, rtx);
+ extern void loongarch_split_128bit_move (rtx, rtx);
+ extern bool loongarch_split_128bit_move_p (rtx, rtx);
+@@ -139,50 +84,29 @@ extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx);
+ extern void loongarch_split_lsx_fill_d (rtx, rtx);
+ extern const char *loongarch_output_move (rtx, rtx);
+ extern bool loongarch_cfun_has_cprestore_slot_p (void);
+-extern bool loongarch_cprestore_address_p (rtx, bool);
+ #ifdef RTX_CODE
+ extern void loongarch_expand_scc (rtx *);
+ extern bool loongarch_expand_int_vec_cmp (rtx *);
+ extern bool loongarch_expand_fp_vec_cmp (rtx *);
+ extern void loongarch_expand_conditional_branch (rtx *);
+-extern void loongarch_expand_conditional_move (rtx *);
++extern bool loongarch_expand_conditional_move_la464 (rtx *);
+ extern void loongarch_expand_conditional_trap (rtx);
+ #endif
+-extern bool loongarch_get_pic_call_symbol (rtx *, int);
+ extern void loongarch_set_return_address (rtx, rtx);
+ extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+-extern bool loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+ extern bool loongarch_expand_block_move (rtx, rtx, rtx);
+ 
+-extern void loongarch_init_cumulative_args (CUMULATIVE_ARGS *, tree);
+ extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+-					       HOST_WIDE_INT, bool);
++						    HOST_WIDE_INT, bool);
+ extern bool loongarch_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+-						HOST_WIDE_INT);
+-extern bool loongarch_mem_fits_mode_p (machine_mode mode, rtx x);
++						     HOST_WIDE_INT);
+ extern HOST_WIDE_INT loongarch_debugger_offset (rtx, HOST_WIDE_INT);
+ 
+-extern void loongarch_push_asm_switch (struct loongarch_asm_switch *);
+-extern void loongarch_pop_asm_switch (struct loongarch_asm_switch *);
+ extern void loongarch_output_external (FILE *, tree, const char *);
+ extern void loongarch_output_ascii (FILE *, const char *, size_t);
+-extern void loongarch_output_aligned_decl_common (FILE *, tree, const char *,
+-					     unsigned HOST_WIDE_INT,
+-					     unsigned int);
+-extern void loongarch_declare_common_object (FILE *, const char *,
+-					const char *, unsigned HOST_WIDE_INT,
+-					unsigned int, bool);
+-extern void loongarch_declare_object (FILE *, const char *, const char *,
+-				 const char *, ...) ATTRIBUTE_PRINTF_4;
+-extern void loongarch_declare_object_name (FILE *, const char *, tree);
+-extern void loongarch_finish_declare_object (FILE *, tree, int, int);
+-extern void loongarch_set_text_contents_type (FILE *, const char *,
+-					 unsigned long, bool);
+-
+ extern bool loongarch_small_data_pattern_p (rtx);
+ extern rtx loongarch_rewrite_small_data (rtx);
+ extern rtx loongarch_return_addr (int, rtx);
+-extern bool loongarch_must_initialize_gp_p (void);
+ 
+ extern bool loongarch_const_vector_same_val_p (rtx, machine_mode);
+ extern bool loongarch_const_vector_same_bytes_p (rtx, machine_mode);
+@@ -194,26 +118,27 @@ extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode);
+ extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool);
+ extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT);
+ extern enum reg_class loongarch_secondary_reload_class (enum reg_class,
+-						   machine_mode,
+-						   rtx, bool);
++							machine_mode,
++							rtx, bool);
+ extern int loongarch_class_max_nregs (enum reg_class, machine_mode);
+ 
+ extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int,
+-						      unsigned int,
+-						      machine_mode);
++							   unsigned int,
++							   machine_mode);
+ extern int loongarch_adjust_insn_length (rtx_insn *, int);
+ extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *,
+-						   const char *, const char *);
+-extern const char *loongarch_output_order_conditional_branch (rtx_insn *, rtx *,
+-							 bool);
+-extern const char *loongarch_output_equal_conditional_branch (rtx_insn *, rtx *,
+-							 bool);
++							const char *,
++							const char *);
++extern const char *loongarch_output_order_conditional_branch (rtx_insn *,
++							      rtx *,
++							      bool);
++extern const char *loongarch_output_equal_conditional_branch (rtx_insn *,
++							      rtx *,
++							      bool);
+ extern const char *loongarch_output_division (const char *, rtx *);
+ extern const char *loongarch_lsx_output_division (const char *, rtx *);
+ extern const char *loongarch_output_probe_stack_range (rtx, rtx, rtx);
+ extern bool loongarch_hard_regno_rename_ok (unsigned int, unsigned int);
+-extern bool loongarch_linked_madd_p (rtx_insn *, rtx_insn *);
+-extern bool loongarch_store_data_bypass_p (rtx_insn *, rtx_insn *);
+ extern int loongarch_dspalu_bypass_p (rtx, rtx);
+ extern rtx loongarch_prefetch_cookie (rtx, rtx);
+ 
+@@ -226,9 +151,6 @@ extern const char *current_section_name (void);
+ extern unsigned int current_section_flags (void);
+ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+ 
+-extern bool and_operands_ok (machine_mode, rtx, rtx);
+-extern bool loongarch_fmadd_bypass (rtx_insn *, rtx_insn *);
+-
+ union loongarch_gen_fn_ptrs
+ {
+   rtx (*fn_8) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+@@ -239,25 +161,26 @@ union loongarch_gen_fn_ptrs
+ };
+ 
+ extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs,
+-				     rtx, rtx, rtx, rtx, rtx);
++					  rtx, rtx, rtx, rtx, rtx);
+ 
+ extern void loongarch_expand_vector_init (rtx, rtx);
+ extern void loongarch_expand_vec_unpack (rtx op[2], bool, bool);
++extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx);
++extern void loongarch_expand_vec_perm_1 (rtx[]);
++extern void loongarch_expand_vector_extract (rtx, rtx, int);
++extern void loongarch_expand_vector_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
+ 
+ extern int loongarch_ldst_scaled_shift (machine_mode);
+ extern bool loongarch_signed_immediate_p (unsigned HOST_WIDE_INT, int, int);
+ extern bool loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int);
+-extern bool loongarch_load_store_pair_p (bool, rtx *);
+-extern bool loongarch_movep_target_p (rtx, rtx);
+ extern bool loongarch_12bit_offset_address_p (rtx, machine_mode);
+ extern bool loongarch_14bit_shifted_offset_address_p (rtx, machine_mode);
++extern bool loongarch_base_index_address_p (rtx, machine_mode);
+ extern bool loongarch_9bit_offset_address_p (rtx, machine_mode);
+-extern bool lwsp_swsp_address_p (rtx, machine_mode);
+ extern rtx loongarch_expand_thread_pointer (rtx);
+ 
+ extern bool loongarch_eh_uses (unsigned int);
+ extern bool loongarch_epilogue_uses (unsigned int);
+-extern int loongarch_trampoline_code_size (void);
+ extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool);
+ extern bool loongarch_la464_128_store_p (rtx[]);
+ extern bool loongarch_la464_128_load_p (rtx[]);
+@@ -270,10 +193,6 @@ typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
+ extern void loongarch_register_frame_header_opt (void);
+ extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
+ 
+-extern void loongarch_declare_function_name(FILE *, const char *, tree);
+-/* Routines implemented in loongarch-d.c  */
+-extern void loongarch_d_target_versions (void);
+-
+ /* Routines implemented in loongarch-c.c.  */
+ void loongarch_cpu_cpp_builtins (cpp_reader *);
+ 
+@@ -281,10 +200,12 @@ extern void loongarch_init_builtins (void);
+ extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *);
+ extern tree loongarch_builtin_decl (unsigned int, bool);
+ extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED,
+-		     machine_mode, int);
++				     machine_mode, int);
+ extern tree loongarch_builtin_vectorized_function (unsigned int, tree, tree);
+ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int);
+ extern tree loongarch_build_builtin_va_list (void);
+-
+ extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool);
++extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool);
++extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
++extern rtx loongarch_prefetch_cookie (rtx, rtx);
+ #endif /* ! GCC_LARCH_PROTOS_H */
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+new file mode 100644
+index 000000000..aca3d667b
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-str.h
+@@ -0,0 +1,68 @@
++/* Generated automatically by "genstr" from "loongarch-strings".
++   Please do not edit this file directly.
++
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef LOONGARCH_STR_H
++#define LOONGARCH_STR_H
++
++#define OPTSTR_ARCH "arch"
++#define OPTSTR_TUNE "tune"
++
++#define STR_CPU_NATIVE "native"
++#define STR_CPU_ABI_DEFAULT "abi-default"
++#define STR_CPU_LOONGARCH64 "loongarch64"
++#define STR_CPU_LA464 "la464"
++#define STR_CPU_LA364 "la364"
++#define STR_CPU_LA264 "la264"
++#define STR_CPU_LA664 "la664"
++
++#define STR_ISA_BASE_LA64V100 "la64"
++
++#define OPTSTR_ISA_EXT_FPU "fpu"
++#define STR_NONE "none"
++#define STR_ISA_EXT_FPU0 "0"
++#define STR_ISA_EXT_FPU32 "32"
++#define STR_ISA_EXT_FPU64 "64"
++
++#define OPTSTR_SOFT_FLOAT "soft-float"
++#define OPTSTR_SINGLE_FLOAT "single-float"
++#define OPTSTR_DOUBLE_FLOAT "double-float"
++
++#define OPTSTR_ISA_EXT_SIMD "simd"
++#define STR_ISA_EXT_LSX "lsx"
++#define STR_ISA_EXT_LASX "lasx"
++
++#define OPTSTR_ABI_BASE "abi"
++#define STR_ABI_BASE_LP64D "lp64d"
++#define STR_ABI_BASE_LP64F "lp64f"
++#define STR_ABI_BASE_LP64S "lp64s"
++#define STR_ABI_BASE_LP64 "lp64"
++
++#define STR_ABI_EXT_BASE "base"
++
++#define OPTSTR_CMODEL "cmodel"
++#define STR_CMODEL_NORMAL "normal"
++#define STR_CMODEL_TINY "tiny"
++#define STR_CMODEL_TS "tiny-static"
++#define STR_CMODEL_LARGE "large"
++#define STR_CMODEL_EXTREME "extreme"
++
++#endif /* LOONGARCH_STR_H */
+diff --git a/gcc/config/loongarch/loongarch-tables.opt b/gcc/config/loongarch/loongarch-tables.opt
+deleted file mode 100644
+index 80794b564..000000000
+--- a/gcc/config/loongarch/loongarch-tables.opt
++++ /dev/null
+@@ -1,34 +0,0 @@
+-; -*- buffer-read-only: t -*-
+-; Generated automatically by genopt.sh from loongarch-cpus.def.
+-
+-; Copyright (C) 2011-2018 Free Software Foundation, Inc.
+-;
+-; This file is part of GCC.
+-;
+-; GCC is free software; you can redistribute it and/or modify it under
+-; the terms of the GNU General Public License as published by the Free
+-; Software Foundation; either version 3, or (at your option) any later
+-; version.
+-;
+-; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+-; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-; for more details.
+-;
+-; You should have received a copy of the GNU General Public License
+-; along with GCC; see the file COPYING3.  If not see
+-; <http://www.gnu.org/licenses/>.
+-
+-Enum
+-Name(loongarch_arch_opt_value) Type(int)
+-Known LARCH CPUs (for use with the -march= and -mtune= options):
+-
+-EnumValue
+-Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly
+-
+-EnumValue
+-Enum(loongarch_arch_opt_value) String(loongarch64) Value(0) Canonical
+-
+-EnumValue
+-Enum(loongarch_arch_opt_value) String(la464) Value(1) Canonical
+-
+diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
+new file mode 100644
+index 000000000..bb01f2d98
+--- /dev/null
++++ b/gcc/config/loongarch/loongarch-tune.h
+@@ -0,0 +1,51 @@
++/* Definitions for microarchitecture-related data structures.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef LOONGARCH_TUNE_H
++#define LOONGARCH_TUNE_H
++
++/* RTX costs of various operations on the different architectures.  */
++struct loongarch_rtx_cost_data
++{
++  unsigned short fp_add;
++  unsigned short fp_mult_sf;
++  unsigned short fp_mult_df;
++  unsigned short fp_div_sf;
++  unsigned short fp_div_df;
++  unsigned short int_mult_si;
++  unsigned short int_mult_di;
++  unsigned short int_div_si;
++  unsigned short int_div_di;
++  unsigned short branch_cost;
++  unsigned short memory_latency;
++};
++
++/* Costs to use when optimizing for size.  */
++extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size;
++
++/* Cache size record of known processor models.  */
++struct loongarch_cache {
++    int l1d_line_size;  /* bytes */
++    int l1d_size;       /* KiB */
++    int l2d_size;       /* kiB */
++    int simultaneous_prefetches; /* number of parallel prefetch */
++};
++
++#endif /* LOONGARCH_TUNE_H */
+diff --git a/gcc/config/loongarch/loongarch.c b/gcc/config/loongarch/loongarch.c
+index e556f81e4..a1dde5a0f 100644
+--- a/gcc/config/loongarch/loongarch.c
++++ b/gcc/config/loongarch/loongarch.c
+@@ -1,9 +1,7 @@
+-/* Subroutines used for LARCH code generation.
+-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+-   Contributed by A. Lichnewsky, lich@inria.inria.fr.
+-   Changes by Michael Meissner, meissner@osf.org.
+-   64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
+-   Brendan Eich, brendan@microunity.com.
++/* Subroutines used for LoongArch code generation.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Technology Co. Ltd..
++   Based on MIPS and RISC-V target for GNU compiler.
+ 
+ This file is part of GCC.
+ 
+@@ -63,8 +61,14 @@ along with GCC; see the file COPYING3.  If not see
+ #include "target-globals.h"
+ #include "tree-pass.h"
+ #include "context.h"
++#include "shrink-wrap.h"
+ #include "builtins.h"
+ #include "rtl-iter.h"
++#include "cfgloop.h"
++#include "gimple-iterator.h"
++#include "tree-vectorizer.h"
++#include "params.h"
++#include "opts.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -76,48 +80,20 @@ along with GCC; see the file COPYING3.  If not see
+    && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
+ 
+ /* Extract the symbol or label from UNSPEC wrapper X.  */
+-#define UNSPEC_ADDRESS(X) \
+-  XVECEXP (X, 0, 0)
++#define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0)
+ 
+ /* Extract the symbol type from UNSPEC wrapper X.  */
+ #define UNSPEC_ADDRESS_TYPE(X) \
+   ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
+ 
+-/* The maximum distance between the top of the stack frame and the
+-   value $sp has when we save and restore registers.
+-*/
+-#define LARCH_MAX_FIRST_STACK_STEP    0x7f0
+-
+ /* True if INSN is a loongarch.md pattern or asm statement.  */
+ /* ???	This test exists through the compiler, perhaps it should be
+-	moved to rtl.h.  */
++   moved to rtl.h.  */
+ #define USEFUL_INSN_P(INSN)						\
+   (NONDEBUG_INSN_P (INSN)						\
+    && GET_CODE (PATTERN (INSN)) != USE					\
+    && GET_CODE (PATTERN (INSN)) != CLOBBER)
+ 
+-/* If INSN is a delayed branch sequence, return the first instruction
+-   in the sequence, otherwise return INSN itself.  */
+-#define SEQ_BEGIN(INSN)							\
+-  (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE		\
+-   ? as_a <rtx_insn *> (XVECEXP (PATTERN (INSN), 0, 0))			\
+-   : (INSN))
+-
+-/* Likewise for the last instruction in a delayed branch sequence.  */
+-#define SEQ_END(INSN)							\
+-  (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE		\
+-   ? as_a <rtx_insn *> (XVECEXP (PATTERN (INSN),			\
+-				 0,					\
+-				 XVECLEN (PATTERN (INSN), 0) - 1))	\
+-   : (INSN))
+-
+-/* Execute the following loop body with SUBINSN set to each instruction
+-   between SEQ_BEGIN (INSN) and SEQ_END (INSN) inclusive.  */
+-#define FOR_EACH_SUBINSN(SUBINSN, INSN)					\
+-  for ((SUBINSN) = SEQ_BEGIN (INSN);					\
+-       (SUBINSN) != NEXT_INSN (SEQ_END (INSN));				\
+-       (SUBINSN) = NEXT_INSN (SUBINSN))
+-
+ /* True if bit BIT is set in VALUE.  */
+ #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
+ 
+@@ -127,54 +103,25 @@ along with GCC; see the file COPYING3.  If not see
+        A natural register + offset address.  The register satisfies
+        loongarch_valid_base_register_p and the offset is a const_arith_operand.
+ 
++   ADDRESS_REG_REG
++       A base register indexed by (optionally scaled) register.
++
+    ADDRESS_CONST_INT
+        A signed 16-bit constant address.
+ 
+    ADDRESS_SYMBOLIC:
+        A constant symbolic address.  */
+-enum loongarch_address_type {
++enum loongarch_address_type
++{
+   ADDRESS_REG,
++  ADDRESS_REG_REG,
+   ADDRESS_CONST_INT,
+   ADDRESS_SYMBOLIC
+ };
+ 
+ 
+-/* A class used to control a comdat-style stub that we output in each
+-   translation unit that needs it.  */
+-class loongarch_one_only_stub {
+-public:
+-  virtual ~loongarch_one_only_stub () {}
+-
+-  /* Return the name of the stub.  */
+-  virtual const char *get_name () = 0;
+-
+-  /* Output the body of the function to asm_out_file.  */
+-  virtual void output_body () = 0;
+-};
+-
+-/* Tuning information that is automatically derived from other sources
+-   (such as the scheduler).  */
+-static struct {
+-  /* The architecture and tuning settings that this structure describes.  */
+-  enum processor arch;
+-  enum processor tune;
+-
+-  /* True if the structure has been initialized.  */
+-  bool initialized_p;
+-
+-} loongarch_tuning_info;
+-
+-/* Information about an address described by loongarch_address_type.
+-
+-   ADDRESS_CONST_INT
+-       No fields are used.
+-
+-   ADDRESS_REG
+-       REG is the base register and OFFSET is the constant offset.
+-
+-   ADDRESS_SYMBOLIC
+-       SYMBOL_TYPE is the type of symbol that the address references.  */
+-struct loongarch_address_info {
++struct loongarch_address_info
++{
+   enum loongarch_address_type type;
+   rtx reg;
+   rtx offset;
+@@ -184,224 +131,82 @@ struct loongarch_address_info {
+ /* Method to load immediate number fields.
+ 
+    METHOD_NORMAL:
+-     load immediate number 0-31 bit
++     Load bit 0-31 of the immediate number.
+ 
+    METHOD_LU32I:
+-     load imm 32-51 bit
++     Load bit 32-51 of the immediate number.
+ 
+    METHOD_LU52I:
+-     load imm 52-63 bit
++     load bit 52-63 of the immediate number.
+ 
+    METHOD_INSV:
+-     imm 0xfff00000fffffxxx
++     immediates like 0xfff00000fffffxxx
+    */
+-enum loongarch_load_imm_method {
++enum loongarch_load_imm_method
++{
+   METHOD_NORMAL,
+   METHOD_LU32I,
+   METHOD_LU52I,
+   METHOD_INSV
+ };
+ 
+-/* One stage in a constant building sequence.  These sequences have
+-   the form:
+-
+-	A = VALUE[0]
+-	A = A CODE[1] VALUE[1]
+-	A = A CODE[2] VALUE[2]
+-	...
+-
+-   where A is an accumulator, each CODE[i] is a binary rtl operation
+-   and each VALUE[i] is a constant integer.  CODE[0] is undefined.  */
+-struct loongarch_integer_op {
++struct loongarch_integer_op
++{
+   enum rtx_code code;
+-  unsigned HOST_WIDE_INT value;
++  HOST_WIDE_INT value;
+   enum loongarch_load_imm_method method;
+ };
+ 
+ /* The largest number of operations needed to load an integer constant.
+-   The worst accepted case for 64-bit constants is LUI,ORI,SLL,ORI,SLL,ORI.
+-   When the lowest bit is clear, we can try, but reject a sequence with
+-   an extra SLL at the end.  */
+-#define LARCH_MAX_INTEGER_OPS 9
+-
+-/* Costs of various operations on the different architectures.  */
+-
+-struct loongarch_rtx_cost_data
+-{
+-  unsigned short fp_add;
+-  unsigned short fp_mult_sf;
+-  unsigned short fp_mult_df;
+-  unsigned short fp_div_sf;
+-  unsigned short fp_div_df;
+-  unsigned short int_mult_si;
+-  unsigned short int_mult_di;
+-  unsigned short int_div_si;
+-  unsigned short int_div_di;
+-  unsigned short branch_cost;
+-  unsigned short memory_latency;
+-};
+-
+-/* Global variables for machine-dependent things.  */
+-
+-/* The -G setting, or the configuration's default small-data limit if
+-   no -G option is given.  */
+-static unsigned int loongarch_small_data_threshold;
+-
+-/* The number of file directives written by loongarch_output_filename.  */
+-int num_source_filenames;
+-
+-/* The name that appeared in the last .file directive written by
+-   loongarch_output_filename, or "" if loongarch_output_filename hasn't
+-   written anything yet.  */
+-const char *current_function_file = "";
++   The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI
++   or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME.  */
++#define LARCH_MAX_INTEGER_OPS 4
+ 
+ /* Arrays that map GCC register numbers to debugger register numbers.  */
+-int loongarch_dbx_regno[FIRST_PSEUDO_REGISTER];
+ int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER];
+ 
+-/* The current instruction-set architecture.  */
+-enum processor loongarch_arch;
+-const struct loongarch_cpu_info *loongarch_arch_info;
+-
+-/* The processor that we should tune the code for.  */
+-enum processor loongarch_tune;
+-const struct loongarch_cpu_info *loongarch_tune_info;
+-
+-/* The ISA level associated with loongarch_arch.  */
+-int loongarch_isa;
+-
+-/* The ISA revision level. */ 
+-int loongarch_isa_rev;
+-
+-/* Which cost information to use.  */
+-static const struct loongarch_rtx_cost_data *loongarch_cost;
+-
+ /* Index [M][R] is true if register R is allowed to hold a value of mode M.  */
+-static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
++static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE]
++					  [FIRST_PSEUDO_REGISTER];
+ 
+ /* Index C is true if character C is a valid PRINT_OPERAND punctation
+    character.  */
+ static bool loongarch_print_operand_punct[256];
+ 
+-static GTY (()) int loongarch_output_filename_first_time = 1;
+-
+-/* loongarch_use_pcrel_pool_p[X] is true if symbols of type X should be
+-   forced into a PC-relative constant pool.  */
+-bool loongarch_use_pcrel_pool_p[NUM_SYMBOL_TYPES];
+-
+-/* Cached value of can_issue_more. This is cached in loongarch_variable_issue hook
+-   and returned from loongarch_sched_reorder2.  */
++/* Cached value of can_issue_more.  This is cached in loongarch_variable_issue
++   hook and returned from loongarch_sched_reorder2.  */
+ static int cached_can_issue_more;
+ 
+ /* Index R is the smallest register class that contains register R.  */
+ const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
+-  GR_REGS,         GR_REGS,         GR_REGS,         GR_REGS,
+-  JALR_REGS,       JALR_REGS,       JALR_REGS,       JALR_REGS,
+-  JALR_REGS,       JALR_REGS,       JALR_REGS,       JALR_REGS,
+-  SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,
+-  SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,
+-  SIBCALL_REGS,    GR_REGS,         GR_REGS,         JALR_REGS,
+-  JALR_REGS,       JALR_REGS,       JALR_REGS,       JALR_REGS,
+-  JALR_REGS,       JALR_REGS,       JALR_REGS,       JALR_REGS,
+-
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+-  ST_REGS,	ST_REGS,	ST_REGS,	ST_REGS,
+-  ST_REGS,	ST_REGS,	ST_REGS,	ST_REGS,
+-  FRAME_REGS,	FRAME_REGS
+-};
+-
+-static tree loongarch_handle_interrupt_attr (tree *, tree, tree, int, bool *);
+-static tree loongarch_handle_use_shadow_register_set_attr (tree *, tree, tree, int,
+-						      bool *);
+-
+-/* The value of TARGET_ATTRIBUTE_TABLE.  */
+-static const struct attribute_spec loongarch_attribute_table[] = {
+-  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+-       affects_type_identity, handler, exclude } */
+-  { "long_call",   0, 0, false, true,  true,  false, NULL, NULL },
+-  { "short_call",  0, 0, false, true,  true,  false, NULL, NULL },
+-  { "far",     	   0, 0, false, true,  true,  false, NULL, NULL },
+-  { "near",        0, 0, false, true,  true,  false, NULL, NULL },
+-  { "nocompression", 0, 0, true,  false, false, false, NULL, NULL },
+-  /* Allow functions to be specified as interrupt handlers */
+-  { "interrupt",   0, 1, false, true,  true, false, loongarch_handle_interrupt_attr,
+-    NULL },
+-  { "use_shadow_register_set",	0, 1, false, true,  true, false,
+-    loongarch_handle_use_shadow_register_set_attr, NULL },
+-  { "keep_interrupts_masked",	0, 0, false, true,  true, false, NULL, NULL },
+-  { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL },
+-  { NULL,	   0, 0, false, false, false, false, NULL, NULL }
+-};
+-
+-/* A table describing all the processors GCC knows about; see
+-   loongarch-cpus.def for details.  */
+-static const struct loongarch_cpu_info loongarch_cpu_info_table[] = {
+-#define LARCH_CPU(NAME, CPU, ISA, FLAGS) \
+-  { NAME, CPU, ISA, FLAGS },
+-#include "loongarch-cpus.def"
+-#undef LARCH_CPU
+-};
+-
+-/* Default costs.  If these are used for a processor we should look
+-   up the actual costs.  */
+-#define DEFAULT_COSTS COSTS_N_INSNS (6),  /* fp_add */       \
+-                      COSTS_N_INSNS (7),  /* fp_mult_sf */   \
+-                      COSTS_N_INSNS (8),  /* fp_mult_df */   \
+-                      COSTS_N_INSNS (23), /* fp_div_sf */    \
+-                      COSTS_N_INSNS (36), /* fp_div_df */    \
+-                      COSTS_N_INSNS (10), /* int_mult_si */  \
+-                      COSTS_N_INSNS (10), /* int_mult_di */  \
+-                      COSTS_N_INSNS (69), /* int_div_si */   \
+-                      COSTS_N_INSNS (69), /* int_div_di */   \
+-                                       2, /* branch_cost */  \
+-                                       4  /* memory_latency */
+-
+-/* Floating-point costs for processors without an FPU.  Just assume that
+-   all floating-point libcalls are very expensive.  */
+-#define SOFT_FP_COSTS COSTS_N_INSNS (256), /* fp_add */       \
+-                      COSTS_N_INSNS (256), /* fp_mult_sf */   \
+-                      COSTS_N_INSNS (256), /* fp_mult_df */   \
+-                      COSTS_N_INSNS (256), /* fp_div_sf */    \
+-                      COSTS_N_INSNS (256)  /* fp_div_df */
+-
+-/* Costs to use when optimizing for size.  */
+-static const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = {
+-  COSTS_N_INSNS (1),            /* fp_add */
+-  COSTS_N_INSNS (1),            /* fp_mult_sf */
+-  COSTS_N_INSNS (1),            /* fp_mult_df */
+-  COSTS_N_INSNS (1),            /* fp_div_sf */
+-  COSTS_N_INSNS (1),            /* fp_div_df */
+-  COSTS_N_INSNS (1),            /* int_mult_si */
+-  COSTS_N_INSNS (1),            /* int_mult_di */
+-  COSTS_N_INSNS (1),            /* int_div_si */
+-  COSTS_N_INSNS (1),            /* int_div_di */
+-		   2,           /* branch_cost */
+-		   4            /* memory_latency */
++    GR_REGS,	     GR_REGS,	      GR_REGS,	       GR_REGS,
++    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
++    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
++    SIBCALL_REGS,    JIRL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,
++    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,
++    SIBCALL_REGS,    GR_REGS,	      GR_REGS,	       JIRL_REGS,
++    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
++    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
++
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
++    FCC_REGS,	FCC_REGS,	FCC_REGS,	FCC_REGS,
++    FCC_REGS,	FCC_REGS,	FCC_REGS,	FCC_REGS,
++    FRAME_REGS,	FRAME_REGS
+ };
+ 
+-/* Costs to use when optimizing for speed, indexed by processor.  */
+-static const struct loongarch_rtx_cost_data
+-  loongarch_rtx_cost_data[NUM_PROCESSOR_VALUES] = {
+-  { /* loongarch */
+-    DEFAULT_COSTS
+-  },
+-  { /* loongarch64 */
+-    DEFAULT_COSTS
+-  },
+-  { /* la464 */
+-    DEFAULT_COSTS
+-  }
+-};
++/* Which cost information to use.  */
++static const struct loongarch_rtx_cost_data *loongarch_cost;
+ 
+ /* Information about a single argument.  */
+-struct loongarch_arg_info {
++struct loongarch_arg_info
++{
+   /* True if the argument is at least partially passed on the stack.  */
+   bool stack_p;
+ 
+@@ -419,21 +224,6 @@ struct loongarch_arg_info {
+   unsigned int fpr_offset;
+ };
+ 
+-
+-/* Emit a move from SRC to DEST.  Assume that the move expanders can
+-   handle all moves if !can_create_pseudo_p ().  The distinction is
+-   important because, unlike emit_move_insn, the move expanders know
+-   how to force Pmode objects into the constant pool even when the
+-   constant pool address is not itself legitimate.  */
+-
+-rtx
+-loongarch_emit_move (rtx dest, rtx src)
+-{
+-  return (can_create_pseudo_p ()
+-	  ? emit_move_insn (dest, src)
+-	  : emit_move_insn_1 (dest, src));
+-}
+-
+ /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Every parameter gets at
+    least PARM_BOUNDARY bits of alignment, but will be given anything up
+    to PREFERRED_STACK_BOUNDARY bits if the type requires it.  */
+@@ -470,7 +260,8 @@ loongarch_pass_mode_in_fpr_p (machine_mode mode)
+   return 0;
+ }
+ 
+-typedef struct {
++typedef struct
++{
+   const_tree type;
+   HOST_WIDE_INT offset;
+ } loongarch_aggregate_field;
+@@ -480,18 +271,18 @@ typedef struct {
+ 
+ static int
+ loongarch_flatten_aggregate_field (const_tree type,
+-			       loongarch_aggregate_field fields[2],
+-			       int n, HOST_WIDE_INT offset,
+-			       const int use_vecarg_p)
++				   loongarch_aggregate_field fields[2], int n,
++				   HOST_WIDE_INT offset,
++				   const int use_vecarg_p)
+ {
+   switch (TREE_CODE (type))
+     {
+     case RECORD_TYPE:
+-     /* Can't handle incomplete types nor sizes that are not fixed.  */
+-     if (!COMPLETE_TYPE_P (type)
+-	 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+-	 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
+-       return -1;
++      /* Can't handle incomplete types nor sizes that are not fixed.  */
++      if (!COMPLETE_TYPE_P (type)
++	  || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
++	  || !tree_fits_uhwi_p (TYPE_SIZE (type)))
++	return -1;
+ 
+       for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+ 	if (TREE_CODE (f) == FIELD_DECL)
+@@ -500,7 +291,8 @@ loongarch_flatten_aggregate_field (const_tree type,
+ 	      return -1;
+ 
+ 	    HOST_WIDE_INT pos = offset + int_byte_position (f);
+-	    n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos, 0);
++	    n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n,
++						   pos, 0);
+ 	    if (n < 0)
+ 	      return -1;
+ 	  }
+@@ -513,7 +305,8 @@ loongarch_flatten_aggregate_field (const_tree type,
+ 	tree index = TYPE_DOMAIN (type);
+ 	tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
+ 	int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type),
+-							 subfields, 0, offset, 0);
++							 subfields, 0,
++							 offset, 0);
+ 
+ 	/* Can't handle incomplete types nor sizes that are not fixed.  */
+ 	if (n_subfields <= 0
+@@ -528,7 +321,7 @@ loongarch_flatten_aggregate_field (const_tree type,
+ 	  return -1;
+ 
+ 	n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+-		   - tree_to_uhwi (TYPE_MIN_VALUE (index));
++		 - tree_to_uhwi (TYPE_MIN_VALUE (index));
+ 	gcc_assert (n_elts >= 0);
+ 
+ 	for (HOST_WIDE_INT i = 0; i < n_elts; i++)
+@@ -566,11 +359,11 @@ loongarch_flatten_aggregate_field (const_tree type,
+       }
+ 
+     default:
+-      if (n < 2
++      if ((n < 2
+ 	  && ((SCALAR_FLOAT_TYPE_P (type)
+ 	       && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
+ 	      || (INTEGRAL_TYPE_P (type)
+-		  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))
++		  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
+ 	      || (use_vecarg_p && VECTOR_TYPE_P (type)
+ 		  && ((ISA_HAS_LSX && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_LSX_REG)
+ 		      || (ISA_HAS_LASX && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_LASX_REG))))
+@@ -589,8 +382,8 @@ loongarch_flatten_aggregate_field (const_tree type,
+ 
+ static int
+ loongarch_flatten_aggregate_argument (const_tree type,
+-				  loongarch_aggregate_field fields[2],
+-				  const int use_vecarg_p)
++				      loongarch_aggregate_field fields[2],
++				      const int use_vecarg_p)
+ {
+   if (!type || !((TREE_CODE (type) == RECORD_TYPE)
+ 		 || (use_vecarg_p && TREE_CODE (type) == VECTOR_TYPE)))
+@@ -603,9 +396,9 @@ loongarch_flatten_aggregate_argument (const_tree type,
+    two floating-point registers.  If so, populate FIELDS accordingly.  */
+ 
+ static unsigned
+-loongarch_pass_aggregate_in_fpr_pair_p (const_tree type,
+-				    loongarch_aggregate_field fields[2],
+-				    const int use_vecarg_p)
++loongarch_pass_aggregate_num_fpr (const_tree type,
++				  loongarch_aggregate_field fields[2],
++				  const int use_vecarg_p)
+ {
+   int n = loongarch_flatten_aggregate_argument (type, fields, use_vecarg_p);
+ 
+@@ -616,13 +409,13 @@ loongarch_pass_aggregate_in_fpr_pair_p (const_tree type,
+   return n > 0 ? n : 0;
+ }
+ 
+-/* See whether TYPE is a record whose fields should be returned in one or
++/* See whether TYPE is a record whose fields should be returned in one
+    floating-point register and one integer register.  If so, populate
+    FIELDS accordingly.  */
+ 
+ static bool
+ loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
+-				       loongarch_aggregate_field fields[2])
++					   loongarch_aggregate_field fields[2])
+ {
+   unsigned num_int = 0, num_float = 0;
+   int n = loongarch_flatten_aggregate_argument (type, fields, 0);
+@@ -640,20 +433,21 @@ loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
+    when the value has mode VALUE_MODE and the type has TYPE_MODE.  The
+    two modes may be different for structures like:
+ 
+-       struct __attribute__((packed)) foo { float f; }
++   struct __attribute__((packed)) foo { float f; }
+ 
+-  where the SFmode value "f" is passed in REGNO but the struct itself
+-  has mode BLKmode.  */
++   where the SFmode value "f" is passed in REGNO but the struct itself
++   has mode BLKmode.  */
+ 
+ static rtx
+ loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
+-		       machine_mode value_mode)
++			   machine_mode value_mode,
++			   HOST_WIDE_INT offset)
+ {
+   rtx x = gen_rtx_REG (value_mode, regno);
+ 
+   if (type_mode != value_mode)
+     {
+-      x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
++      x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
+       x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
+     }
+   return x;
+@@ -666,19 +460,16 @@ loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
+ 
+ static rtx
+ loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1,
+-		     machine_mode mode1, HOST_WIDE_INT offset1,
+-		     unsigned regno2, machine_mode mode2,
+-		     HOST_WIDE_INT offset2)
++			 machine_mode mode1, HOST_WIDE_INT offset1,
++			 unsigned regno2, machine_mode mode2,
++			 HOST_WIDE_INT offset2)
+ {
+-  return gen_rtx_PARALLEL
+-    (mode,
+-     gen_rtvec (2,
+-		gen_rtx_EXPR_LIST (VOIDmode,
+-				   gen_rtx_REG (mode1, regno1),
+-				   GEN_INT (offset1)),
+-		gen_rtx_EXPR_LIST (VOIDmode,
+-				   gen_rtx_REG (mode2, regno2),
+-				   GEN_INT (offset2))));
++  return gen_rtx_PARALLEL (
++    mode, gen_rtvec (2,
++		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1),
++					GEN_INT (offset1)),
++		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2),
++					GEN_INT (offset2))));
+ }
+ 
+ /* Fill INFO with information about a single argument, and return an
+@@ -689,9 +480,9 @@ loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1,
+    returning the argument, or false if passing the argument.  */
+ 
+ static rtx
+-loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS *cum,
+-		    machine_mode mode, const_tree type, bool named,
+-		    bool return_p)
++loongarch_get_arg_info (struct loongarch_arg_info *info,
++			const CUMULATIVE_ARGS *cum, machine_mode mode,
++			const_tree type, bool named, bool return_p)
+ {
+   unsigned num_bytes, num_words;
+   unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
+@@ -713,21 +504,23 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS *
+       unsigned gregno = gpr_base + info->gpr_offset;
+ 
+       /* Pass one- or two-element floating-point aggregates in FPRs.  */
+-      if ((info->num_fprs = loongarch_pass_aggregate_in_fpr_pair_p (type, fields, use_vecarg_p))
++      if ((info->num_fprs
++	   = loongarch_pass_aggregate_num_fpr (type, fields, use_vecarg_p))
+ 	  && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
+ 	switch (info->num_fprs)
+ 	  {
+ 	  case 1:
+ 	    return loongarch_pass_fpr_single (mode, fregno,
+-					  TYPE_MODE (fields[0].type));
++					      TYPE_MODE (fields[0].type),
++					      fields[0].offset);
+ 
+ 	  case 2:
+ 	    return loongarch_pass_fpr_pair (mode, fregno,
+-					TYPE_MODE (fields[0].type),
+-					fields[0].offset,
+-					fregno + 1,
+-					TYPE_MODE (fields[1].type),
+-					fields[1].offset);
++					    TYPE_MODE (fields[0].type),
++					    fields[0].offset,
++					    fregno + 1,
++					    TYPE_MODE (fields[1].type),
++					    fields[1].offset);
+ 
+ 	  default:
+ 	    gcc_unreachable ();
+@@ -742,9 +535,10 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS *
+ 	    return gen_rtx_REG (mode, fregno);
+ 
+ 	  case MODE_COMPLEX_FLOAT:
+-	    return loongarch_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
+-					fregno + 1, GET_MODE_INNER (mode),
+-					GET_MODE_UNIT_SIZE (mode));
++	    return loongarch_pass_fpr_pair (mode, fregno,
++					    GET_MODE_INNER (mode), 0,
++					    fregno + 1, GET_MODE_INNER (mode),
++					    GET_MODE_UNIT_SIZE (mode));
+ 
+ 	  default:
+ 	    gcc_unreachable ();
+@@ -761,10 +555,11 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS *
+ 	  if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
+ 	    std::swap (fregno, gregno);
+ 
+-	  return loongarch_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
+-				      fields[0].offset,
+-				      gregno, TYPE_MODE (fields[1].type),
+-				      fields[1].offset);
++	  return loongarch_pass_fpr_pair (mode, fregno,
++					  TYPE_MODE (fields[0].type),
++					  fields[0].offset, gregno,
++					  TYPE_MODE (fields[1].type),
++					  fields[1].offset);
+ 	}
+     }
+ 
+@@ -791,7 +586,7 @@ loongarch_get_arg_info (struct loongarch_arg_info *info, const CUMULATIVE_ARGS *
+ 
+ static rtx
+ loongarch_function_arg (cumulative_args_t cum_v, machine_mode mode,
+-		    const_tree type, bool named)
++			const_tree type, bool named)
+ {
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+   struct loongarch_arg_info info;
+@@ -806,7 +601,7 @@ loongarch_function_arg (cumulative_args_t cum_v, machine_mode mode,
+ 
+ static void
+ loongarch_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
+-			    const_tree type, bool named)
++				const_tree type, bool named)
+ {
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+   struct loongarch_arg_info info;
+@@ -825,11 +620,12 @@ loongarch_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
+ 
+ static int
+ loongarch_arg_partial_bytes (cumulative_args_t cum,
+-			 machine_mode mode, tree type, bool named)
++			     machine_mode mode, tree type, bool named)
+ {
+   struct loongarch_arg_info arg;
+ 
+-  loongarch_get_arg_info (&arg, get_cumulative_args (cum), mode, type, named, false);
++  loongarch_get_arg_info (&arg, get_cumulative_args (cum),
++			  mode, type, named, false);
+   return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
+ }
+ 
+@@ -837,8 +633,9 @@ loongarch_arg_partial_bytes (cumulative_args_t cum,
+    VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
+    VALTYPE is null and MODE is the mode of the return value.  */
+ 
+-rtx
+-loongarch_function_value (const_tree type, const_tree func, machine_mode mode)
++static rtx
++loongarch_function_value_1 (const_tree type, const_tree func,
++			    machine_mode mode)
+ {
+   struct loongarch_arg_info info;
+   CUMULATIVE_ARGS args;
+@@ -854,15 +651,34 @@ loongarch_function_value (const_tree type, const_tree func, machine_mode mode)
+       mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
+     }
+ 
+-  memset (&args, 0, sizeof args);
++  memset (&args, 0, sizeof (args));
+   return loongarch_get_arg_info (&info, &args, mode, type, true, true);
+ }
+ 
+-/* Implement TARGET_PASS_BY_REFERENCE. */
++
++/* Implement TARGET_FUNCTION_VALUE.  */
++
++static rtx
++loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type,
++			  bool outgoing ATTRIBUTE_UNUSED)
++{
++  return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
++}
++
++/* Implement TARGET_LIBCALL_VALUE.  */
++
++static rtx
++loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
++{
++  return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode);
++}
++
++
++/* Implement TARGET_PASS_BY_REFERENCE.  */
+ 
+ static bool
+ loongarch_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
+-			 const_tree type, bool named)
++			     const_tree type, bool named)
+ {
+   HOST_WIDE_INT size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+   struct loongarch_arg_info info;
+@@ -886,23 +702,25 @@ loongarch_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
+ /* Implement TARGET_RETURN_IN_MEMORY.  */
+ 
+ static bool
+-loongarch_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
++loongarch_return_in_memory (const_tree type,
++			    const_tree fndecl ATTRIBUTE_UNUSED)
+ {
+   CUMULATIVE_ARGS args;
+   cumulative_args_t cum = pack_cumulative_args (&args);
+ 
+   /* The rules for returning in memory are the same as for passing the
+      first named argument by reference.  */
+-  memset (&args, 0, sizeof args);
++  memset (&args, 0, sizeof (args));
+   return loongarch_pass_by_reference (cum, TYPE_MODE (type), type, true);
+ }
+ 
+ /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+ 
+ static void
+-loongarch_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
+-			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
+-			     int no_rtl)
++loongarch_setup_incoming_varargs (cumulative_args_t cum,
++				  machine_mode mode, tree type,
++				  int *pretend_size ATTRIBUTE_UNUSED,
++				  int no_rtl)
+ {
+   CUMULATIVE_ARGS local_cum;
+   int gp_saved;
+@@ -911,7 +729,8 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
+      argument.  Advance a local copy of CUM past the last "real" named
+      argument, to find out how many registers are left over.  */
+   local_cum = *get_cumulative_args (cum);
+-  loongarch_function_arg_advance (pack_cumulative_args (&local_cum), mode, type, 1);
++  loongarch_function_arg_advance (pack_cumulative_args (&local_cum),
++				  mode, type, 1);
+ 
+   /* Found out how many registers we need to save.  */
+   gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
+@@ -920,12 +739,11 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
+     {
+       rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
+ 			       REG_PARM_STACK_SPACE (cfun->decl)
+-			       - gp_saved * UNITS_PER_WORD);
++				 - gp_saved * UNITS_PER_WORD);
+       rtx mem = gen_frame_mem (BLKmode, ptr);
+       set_mem_alias_set (mem, get_varargs_alias_set ());
+ 
+-      move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
+-			   mem, gp_saved);
++      move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved);
+     }
+   if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
+     cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
+@@ -941,8 +759,7 @@ loongarch_set_frame_expr (rtx frame_pattern)
+ 
+   insn = get_last_insn ();
+   RTX_FRAME_RELATED_P (insn) = 1;
+-  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+-				      frame_pattern,
++  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern,
+ 				      REG_NOTES (insn));
+ }
+ 
+@@ -963,8 +780,8 @@ static bool
+ loongarch_save_reg_p (unsigned int regno)
+ {
+   bool call_saved = !global_regs[regno] && !call_used_regs[regno];
+-  bool might_clobber = crtl->saves_all_registers
+-		       || df_regs_ever_live_p (regno);
++  bool might_clobber
++    = crtl->saves_all_registers || df_regs_ever_live_p (regno);
+ 
+   if (call_saved && might_clobber)
+     return true;
+@@ -978,15 +795,6 @@ loongarch_save_reg_p (unsigned int regno)
+   return false;
+ }
+ 
+-/* Determine whether to call GPR save/restore routines.  */
+-static bool
+-loongarch_use_save_libcall (const struct loongarch_frame_info *frame)
+-{
+-  // FIXME: if (!TARGET_SAVE_RESTORE || crtl->calls_eh_return || frame_pointer_needed)
+-  return false;
+-
+-}
+-
+ /* Determine which GPR save/restore routine to call.  */
+ 
+ static unsigned
+@@ -998,43 +806,114 @@ loongarch_save_libcall_count (unsigned mask)
+   abort ();
+ }
+ 
++/* Find an available register to be used as dynamic realign argument
++   pointer regsiter.  Such a register will be written in prologue and
++   used in begin of body, so it must not be
++	1. parameter passing register.
++	2. GOT pointer.
++   We reuse static-chain register if it is available.  Otherwise, we
++   use r15 for loongarch64(There may be a better choice. TODO).
++
++   Return: the regno of chosen register.  */
++
++static unsigned int
++find_drap_reg (void)
++{
++  tree decl = cfun->decl;
++  /* Always use callee-saved register if there are no caller-saved
++     registers.  */
++  /* Use r15 for nested function or function need static chain.
++     Since function with tail call may use any caller-saved
++     registers in epilogue, DRAP must not use caller-saved
++     register in such case.  */
++  if (DECL_STATIC_CHAIN (decl)
++      || crtl->tail_call_emit)
++    return DRAP_REGNUM;
++
++  return STATIC_CHAIN_REGNUM;
++}
++
++
++/* Return Dynamic Realign Argument Pointer RTX. Now there isn't any.  */
++
++static rtx
++loongarch_get_drap_rtx (void)
++{
++  if (crtl->stack_alignment_needed <= STACK_BOUNDARY
++      || (get_frame_size () == 0 && crtl->args.size == 0))
++    {
++      crtl->stack_realign_needed = false;
++      return NULL;
++    }
++
++  if (loongarch_force_drap)
++     crtl->need_drap = true;
++
++  if (stack_realign_drap)
++    {
++      /* Assign DRAP to vDRAP and returns vDRAP */
++      unsigned int regno = find_drap_reg ();
++      rtx drap_vreg;
++      rtx arg_ptr;
++      rtx_insn *seq, *insn;
++
++      arg_ptr = gen_rtx_REG (Pmode, regno);
++      crtl->drap_reg = arg_ptr;
++
++      start_sequence ();
++      drap_vreg = copy_to_reg (arg_ptr);
++      seq = get_insns ();
++      end_sequence ();
++
++      insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
++      if (!optimize)
++	{
++	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
++	  RTX_FRAME_RELATED_P (insn) = 1;
++	}
++      return drap_vreg;
++    }
++  else
++    return NULL;
++}
++
+ /* Populate the current function's loongarch_frame_info structure.
+ 
+-   LARCH stack frames grown downward.  High addresses are at the top.
+-
+-	+-------------------------------+
+-	|                               |
+-	|  incoming stack arguments     |
+-	|                               |
+-	+-------------------------------+ <-- incoming stack pointer
+-	|                               |
+-	|  callee-allocated save area   |
+-	|  for arguments that are       |
+-	|  split between registers and  |
+-	|  the stack                    |
+-	|                               |
+-	+-------------------------------+ <-- arg_pointer_rtx
+-	|                               |
+-	|  callee-allocated save area   |
+-	|  for register varargs         |
+-	|                               |
+-	+-------------------------------+ <-- hard_frame_pointer_rtx;
+-	|                               |     stack_pointer_rtx + gp_sp_offset
+-	|  GPR save area                |       + UNITS_PER_WORD
+-	|                               |
+-	+-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
+-	|                               |       + UNITS_PER_HWVALUE
+-	|  FPR save area                |
+-	|                               |
+-	+-------------------------------+ <-- frame_pointer_rtx (virtual)
+-	|                               |
+-	|  local variables              |
+-	|                               |
+-      P +-------------------------------+
+-	|                               |
+-	|  outgoing stack arguments     |
+-	|                               |
+-	+-------------------------------+ <-- stack_pointer_rtx
++   LoongArch stack frames grown downward.  High addresses are at the top.
++
++     +-------------------------------+
++     |				     |
++     |  incoming stack arguments     |
++     |				     |
++     +-------------------------------+ <-- incoming stack pointer
++     |				     |
++     |  callee-allocated save area   |
++     |  for arguments that are       |
++     |  split between registers and  |
++     |  the stack		     |
++     |				     |
++     +-------------------------------+ <-- arg_pointer_rtx (virtual)
++     |				     |
++     |  callee-allocated save area   |
++     |  for register varargs	     |
++     |				     |
++     +-------------------------------+ <-- hard_frame_pointer_rtx;
++     |				     |     stack_pointer_rtx + gp_sp_offset
++     |  GPR save area		     |       + UNITS_PER_WORD
++     |				     |
++     +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
++     |				     |       + UNITS_PER_HWVALUE
++     |  FPR save area		     |
++     |				     |
++     +-------------------------------+ <-- frame_pointer_rtx (virtual)
++     |				     |
++     |  local variables		     |
++     |				     |
++   P +-------------------------------+
++     |				     |
++     |  outgoing stack arguments     |
++     |				     |
++     +-------------------------------+ <-- stack_pointer_rtx
+ 
+    Dynamic stack allocations such as alloca insert data at point P.
+    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
+@@ -1050,58 +929,93 @@ loongarch_compute_frame_info (void)
+   frame = &cfun->machine->frame;
+   memset (frame, 0, sizeof (*frame));
+ 
+-    /* Find out which GPRs we need to save.  */
+-    for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
++  /* Find out which GPRs we need to save.  */
++  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
++    if (loongarch_save_reg_p (regno))
++      frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
++
++  /* If this function calls eh_return, we must also save and restore the
++     EH data registers.  */
++  if (crtl->calls_eh_return)
++    for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
++      frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
++
++  /* Find out which FPRs we need to save.  This loop must iterate over
++     the same space as its companion in loongarch_for_each_saved_reg.  */
++  if (TARGET_HARD_FLOAT)
++    for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+       if (loongarch_save_reg_p (regno))
+-	frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
+-
+-    /* If this function calls eh_return, we must also save and restore the
+-       EH data registers.  */
+-    if (crtl->calls_eh_return)
+-      for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
+-	frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
+-
+-    /* Find out which FPRs we need to save.  This loop must iterate over
+-       the same space as its companion in loongarch_for_each_saved_reg.  */
+-    if (TARGET_HARD_FLOAT)
+-      for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+-	if (loongarch_save_reg_p (regno))
+-	  frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
+-
+-  /* At the bottom of the frame are any outgoing stack arguments. */
+-  offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size);
+-  /* Next are local stack variables. */
+-  offset += LARCH_STACK_ALIGN (get_frame_size ());
+-  /* The virtual frame pointer points above the local variables. */
++	frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
++
++  /* Treat drap reg as a callee-saved reg.  */
++  if (stack_realign_drap)
++    frame->mask |= 1 << (find_drap_reg ()), num_x_saved++;
++
++  /* At the bottom of the frame are any outgoing stack arguments.  */
++  offset = LARCH_STACK_ALIGN2 (crtl->outgoing_args_size);
++  /* Next are local stack variables.  */
++  offset += LARCH_STACK_ALIGN2 (get_frame_size ());
++  /* The virtual frame pointer points above the local variables.  */
+   frame->frame_pointer_offset = offset;
+-  /* Next are the callee-saved FPRs. */
++  /* Next are the callee-saved FPRs.  */
+   if (frame->fmask)
+-    offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
+-  frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
+-  /* Next are the callee-saved GPRs. */
++    {
++      if (crtl->stack_realign_needed)
++	offset += num_f_saved * UNITS_PER_FP_REG;
++      else
++        offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
++      frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
++    }
++  else
++    frame->fp_sp_offset = offset;
++  /* Next are the callee-saved GPRs.  */
+   if (frame->mask)
+     {
+-      unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
+-      unsigned num_save_restore = 1 + loongarch_save_libcall_count (frame->mask);
++      unsigned x_save_size;
++      if (crtl->stack_realign_needed)
++	x_save_size = num_x_saved * UNITS_PER_WORD;
++      else
++	x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
++      unsigned num_save_restore
++	= 1 + loongarch_save_libcall_count (frame->mask);
+ 
+       /* Only use save/restore routines if they don't alter the stack size.  */
+       if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
+ 	frame->save_libcall_adjustment = x_save_size;
+ 
+       offset += x_save_size;
++      frame->gp_sp_offset = offset - UNITS_PER_WORD;
+     }
+-  frame->gp_sp_offset = offset - UNITS_PER_WORD;
+-  /* The hard frame pointer points above the callee-saved GPRs. */
+-  frame->hard_frame_pointer_offset = offset;
+-  /* Above the hard frame pointer is the callee-allocated varags save area. */
+-  offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size);
++  else
++    frame->gp_sp_offset = offset;
++
++  /* The hard frame pointer points above the callee-saved GPRs.  */
++  if (crtl->stack_realign_needed)
++    frame->hard_frame_pointer_offset = frame->gp_sp_offset; /* For dwarf.  */
++  else
++    frame->hard_frame_pointer_offset = offset;
++
++  /* Realign here for saving space if crtl->stack_realign_needed is true.  */
++  if (stack_realign_drap)
++    offset = LARCH_STACK_ALIGN2 (offset);
++  else if (stack_realign_fp)
++    offset = LARCH_STACK_ALIGN (offset);
++
++  /* Above the hard frame pointer is the callee-allocated varags save area.  */
++  if (stack_realign_fp)
++    offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size);
++  else
++    offset += LARCH_STACK_ALIGN2 (cfun->machine->varargs_size);
+   /* Next is the callee-allocated area for pretend stack arguments.  */
+-  offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size);
++  if (stack_realign_fp)
++    offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size);
++  else
++    offset += LARCH_STACK_ALIGN2 (crtl->args.pretend_args_size);
+   /* Arg pointer must be below pretend args, but must be above alignment
+      padding.  */
+   frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
+   frame->total_size = offset;
+-  /* Next points the incoming stack pointer and any incoming arguments. */
++  /* Next points the incoming stack pointer and any incoming arguments.  */
+ 
+   /* Only use save/restore routines when the GPRs are atop the frame.  */
+   if (frame->hard_frame_pointer_offset != frame->total_size)
+@@ -1117,8 +1031,6 @@ loongarch_initial_elimination_offset (int from, int to)
+ {
+   HOST_WIDE_INT src, dest;
+ 
+-  loongarch_compute_frame_info ();
+-
+   if (to == HARD_FRAME_POINTER_REGNUM)
+     dest = cfun->machine->frame.hard_frame_pointer_offset;
+   else if (to == STACK_POINTER_REGNUM)
+@@ -1145,8 +1057,8 @@ typedef void (*loongarch_save_restore_fn) (rtx, rtx);
+    stack pointer.  */
+ 
+ static void
+-loongarch_save_restore_reg (machine_mode mode, int regno,
+-		       HOST_WIDE_INT offset, loongarch_save_restore_fn fn)
++loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
++			    loongarch_save_restore_fn fn)
+ {
+   rtx mem;
+ 
+@@ -1159,12 +1071,29 @@ loongarch_save_restore_reg (machine_mode mode, int regno,
+    of the frame.  */
+ 
+ static void
+-loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, loongarch_save_restore_fn fn)
++loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
++			      loongarch_save_restore_fn fn)
+ {
+   HOST_WIDE_INT offset;
+ 
+-  /* Save the link register and s-registers. */
++  /* Save the link register and s-registers.  */
+   offset = cfun->machine->frame.gp_sp_offset - sp_offset;
++
++  /* The drap reg and fp reg have been saved in loongarch_expand_prologue
++   * when stack_realign_drap is true.  */
++  if (stack_realign_drap)
++      offset -= UNITS_PER_WORD * cfun->machine->frame.gpr_saved_num;
++
++  /* Save fp reg first for access incoming-args in stack easily
++   * when stack_realign_fp is true.  */
++  if (stack_realign_fp)
++    {
++	loongarch_save_restore_reg (word_mode, HARD_FRAME_POINTER_REGNUM,
++				    offset, fn);
++	cfun->machine->frame.mask &= (~(1LL << HARD_FRAME_POINTER_REGNUM));
++	offset -= UNITS_PER_WORD;
++    }
++
+   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+       {
+@@ -1172,6 +1101,10 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, loongarch_save_restore_fn
+ 	offset -= UNITS_PER_WORD;
+       }
+ 
++  /* Undo.  */
++  if (stack_realign_fp)
++	cfun->machine->frame.mask |= (1LL << HARD_FRAME_POINTER_REGNUM);
++
+   /* This loop must iterate over the same space as its companion in
+      loongarch_compute_frame_info.  */
+   offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+@@ -1185,6 +1118,19 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, loongarch_save_restore_fn
+       }
+ }
+ 
++/* Emit a move from SRC to DEST.  Assume that the move expanders can
++   handle all moves if !can_create_pseudo_p ().  The distinction is
++   important because, unlike emit_move_insn, the move expanders know
++   how to force Pmode objects into the constant pool even when the
++   constant pool address is not itself legitimate.  */
++
++rtx
++loongarch_emit_move (rtx dest, rtx src)
++{
++  return (can_create_pseudo_p () ? emit_move_insn (dest, src)
++				 : emit_move_insn_1 (dest, src));
++}
++
+ /* Save register REG to MEM.  Make the instruction frame-related.  */
+ 
+ static void
+@@ -1207,575 +1153,690 @@ loongarch_restore_reg (rtx reg, rtx mem)
+   RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ 
+-/* Return the code to invoke the GPR save routine.  */
+-
+-const char *
+-loongarch_output_gpr_save (unsigned mask)
+-{
+-  static char s[32];
+-  unsigned n = loongarch_save_libcall_count (mask);
+-
+-  ssize_t bytes = snprintf (s, sizeof (s), "call\tt0,__loongarch_save_%u", n);
+-  gcc_assert ((size_t) bytes < sizeof (s));
+-
+-  return s;
+-}
+-
+-#define IMM_BITS 12
+-
+-#define IMM_REACH (1LL << IMM_BITS)
+-
+ /* For stack frames that can't be allocated with a single ADDI instruction,
+    compute the best value to initially allocate.  It must at a minimum
+-   allocate enough space to spill the callee-saved registers.  If TARGET_RVC,
+-   try to pick a value that will allow compression of the register saves
+-   without adding extra instructions.  */
++   allocate enough space to spill the callee-saved registers.  */
+ 
+ static HOST_WIDE_INT
+ loongarch_first_stack_step (struct loongarch_frame_info *frame)
+ {
+-  if (SMALL_OPERAND (frame->total_size))
++
++  /* Only for fpr/gpr saved regs first when stack_realign_fp is true.  */
++  if (stack_realign_fp)
++    return frame->total_size - frame->frame_pointer_offset;
++
++  HOST_WIDE_INT realign_size = crtl->stack_alignment_needed / BITS_PER_UNIT;
++
++  if (IMM12_OPERAND (frame->total_size))
+     return frame->total_size;
+ 
+-  HOST_WIDE_INT min_first_step =
+-    LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
++  HOST_WIDE_INT min_first_step
++    = LARCH_STACK_ALIGN2 (frame->total_size - frame->fp_sp_offset);
+   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
+   HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
+-  gcc_assert (min_first_step <= max_first_step);
+ 
+   /* As an optimization, use the least-significant bits of the total frame
+-     size, so that the second adjustment step is just LUI + ADD.  */
+-  if (!SMALL_OPERAND (min_second_step)
++     size, so that the second adjustment step is just LU12I + ADD.  */
++  if (!IMM12_OPERAND (min_second_step)
+       && frame->total_size % IMM_REACH < IMM_REACH / 2
+       && frame->total_size % IMM_REACH >= min_first_step)
+     return frame->total_size % IMM_REACH;
+ 
+-  return max_first_step;
+-}
+-
+-static rtx
+-loongarch_adjust_libcall_cfi_prologue ()
+-{
+-  rtx dwarf = NULL_RTX;
+-  rtx adjust_sp_rtx, reg, mem, insn;
+-  int saved_size = cfun->machine->frame.save_libcall_adjustment;
+-  int offset;
+-
+-  for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+-    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+-      {
+-	/* The save order is ra, s0 to s8.  */
+-	if (regno == RETURN_ADDR_REGNUM)
+-	  offset = saved_size - UNITS_PER_WORD;
+-	else
+-	  offset = saved_size - ((regno - S0_REGNUM + 2) * UNITS_PER_WORD);
+-
+-	reg = gen_rtx_REG (SImode, regno);
+-	mem = gen_frame_mem (SImode, plus_constant (Pmode,
+-						    stack_pointer_rtx,
+-						    offset));
+-
+-	insn = gen_rtx_SET (mem, reg);
+-	dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
+-      }
+-
+-  /* Debug info for adjust sp.  */
+-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
+-				 stack_pointer_rtx, GEN_INT (-saved_size));
+-  dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
+-			  dwarf);
+-  return dwarf;
++  return crtl->stack_realign_needed ? (max_first_step < realign_size
++				       ? realign_size
++				       : ROUND_DOWN (max_first_step,
++						     realign_size))
++				    : max_first_step;
+ }
+ 
+ static void
+ loongarch_emit_stack_tie (void)
+ {
+-  if (Pmode == SImode)
+-    emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
+-  else
+-    emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
+-}
+-
+-/* Return nonzero if this function is known to have a null epilogue.
+-   This allows the optimizer to omit jumps to jumps if no stack
+-   was created.  */
+-
+-bool
+-loongarch_can_use_return_insn (void)
+-{
+-  return reload_completed && cfun->machine->frame.total_size == 0;
++  emit_insn (PMODE_INSN (gen_stack_tie,
++			 (stack_pointer_rtx, hard_frame_pointer_rtx)));
+ }
+ 
+-static rtx
+-loongarch_adjust_libcall_cfi_epilogue ()
+-{
+-  rtx dwarf = NULL_RTX;
+-  rtx adjust_sp_rtx, reg;
+-  int saved_size = cfun->machine->frame.save_libcall_adjustment;
+-
+-  /* Debug info for adjust sp.  */
+-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
+-				 stack_pointer_rtx, GEN_INT (saved_size));
+-  dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
+-			  dwarf);
+-
+-  for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+-    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+-      {
+-	reg = gen_rtx_REG (SImode, regno);
+-	dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+-      }
++#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+ 
+-  return dwarf;
+-}
++#if PROBE_INTERVAL > 16384
++#error Cannot use indexed addressing mode for stack probing
++#endif
+ 
+-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
+-   says which.  */
++/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
++   inclusive.  These are offsets from the current stack pointer.  */
+ 
+-void
+-loongarch_expand_epilogue (bool sibcall_p)
++static void
++loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+ {
+-  /* Split the frame into two.  STEP1 is the amount of stack we should
+-     deallocate before restoring the registers.  STEP2 is the amount we
+-     should deallocate afterwards.
++  HOST_WIDE_INT rounded_size;
++  rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
++  rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
+ 
+-     Start off by assuming that no registers need to be restored.  */
+-  struct loongarch_frame_info *frame = &cfun->machine->frame;
+-  unsigned mask = frame->mask;
+-  HOST_WIDE_INT step1 = frame->total_size;
+-  HOST_WIDE_INT step2 = 0;
+-  bool use_restore_libcall = !sibcall_p && loongarch_use_save_libcall (frame);
+-  rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+-  rtx insn;
++  size = size + first;
++  /* Sanity check for the addressing mode we're going to use.  */
++  gcc_assert (first <= 16384);
+ 
+-  /* We need to add memory barrier to prevent read from deallocated stack.  */
+-  bool need_barrier_p = (get_frame_size ()
+-			 + cfun->machine->frame.arg_pointer_offset) != 0;
++  /* Step 1: round SIZE to the previous multiple of the interval.  */
+ 
+-  if (!sibcall_p && loongarch_can_use_return_insn ())
+-    {
+-      emit_jump_insn (gen_return ());
+-      return;
+-    }
++  rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
+ 
+-  /* Move past any dynamic stack allocations.  */
+-  if (cfun->calls_alloca)
++  /* Step 2: compute initial and final value of the loop counter.  */
++
++  emit_move_insn (r14, GEN_INT (PROBE_INTERVAL));
++  /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
++  if (rounded_size != 0)
+     {
+-      /* Emit a barrier to prevent loads from a deallocated stack.  */
+-      loongarch_emit_stack_tie ();
+-      need_barrier_p = false;
++      emit_move_insn (r12, GEN_INT (rounded_size));
++      emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode,
++						  stack_pointer_rtx, r12)));
+ 
+-      rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
+-      if (!SMALL_OPERAND (INTVAL (adjust)))
+-	{
+-	  loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust);
+-	  adjust = N_LARCH_PROLOGUE_TEMP (Pmode);
+-	}
++      /* Step 3: the loop
+ 
+-      insn = emit_insn (
+-	       gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
+-			      adjust));
++	 do
++	 {
++	 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
++	 probe at TEST_ADDR
++	 }
++	 while (TEST_ADDR != LAST_ADDR)
+ 
+-      rtx dwarf = NULL_RTX;
+-      rtx cfa_adjust_value = gen_rtx_PLUS (
+-			       Pmode, hard_frame_pointer_rtx,
+-			       GEN_INT (-frame->hard_frame_pointer_offset));
+-      rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
+-      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
+-      RTX_FRAME_RELATED_P (insn) = 1;
++	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
++	 until it is equal to ROUNDED_SIZE.  */
+ 
+-      REG_NOTES (insn) = dwarf;
++      emit_insn (PMODE_INSN (gen_probe_stack_range, (stack_pointer_rtx,
++						     stack_pointer_rtx, r12, r14)));
+     }
+ 
+-  /* If we need to restore registers, deallocate as much stack as
+-     possible in the second step without going out of range.  */
+-  if ((frame->mask | frame->fmask) != 0)
+-    {
+-      step2 = loongarch_first_stack_step (frame);
+-      step1 -= step2;
+-    }
++  /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
++     that SIZE is equal to ROUNDED_SIZE.  */
+ 
+-  /* Set TARGET to BASE + STEP1.  */
+-  if (step1 > 0)
++  if (size != rounded_size)
+     {
+-      /* Emit a barrier to prevent loads from a deallocated stack.  */
+-      loongarch_emit_stack_tie ();
+-      need_barrier_p = false;
+-
+-      /* Get an rtx for STEP1 that we can add to BASE.  */
+-      rtx adjust = GEN_INT (step1);
+-      if (!SMALL_OPERAND (step1))
++      if (size - rounded_size >= PROBE_INTERVAL/2)
+ 	{
+-	  loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust);
+-	  adjust = N_LARCH_PROLOGUE_TEMP (Pmode);
++	  emit_move_insn (r14, GEN_INT (size - rounded_size));
++	  emit_insn (gen_rtx_SET (stack_pointer_rtx, gen_rtx_MINUS (Pmode,
++								    stack_pointer_rtx,
++								    r14)));
+ 	}
++      else
++	emit_insn (gen_rtx_SET (stack_pointer_rtx, gen_rtx_PLUS (Pmode,
++								 stack_pointer_rtx,
++								 GEN_INT (rounded_size - size))));
+ 
+-      insn = emit_insn (
+-	       gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, adjust));
+-
+-      rtx dwarf = NULL_RTX;
+-      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+-					 GEN_INT (step2));
+-
+-      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
+-      RTX_FRAME_RELATED_P (insn) = 1;
+-
+-      REG_NOTES (insn) = dwarf;
+     }
+ 
+-  if (use_restore_libcall)
+-    frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
+-
+-  /* Restore the registers.  */
+-  loongarch_for_each_saved_reg (frame->total_size - step2, loongarch_restore_reg);
+-
+-  if (use_restore_libcall)
++  if (first)
+     {
+-      frame->mask = mask; /* Undo the above fib.  */
+-      gcc_assert (step2 >= frame->save_libcall_adjustment);
+-      step2 -= frame->save_libcall_adjustment;
++      emit_move_insn (r12, GEN_INT (first));
++      emit_insn (gen_rtx_SET (stack_pointer_rtx, gen_rtx_PLUS (Pmode,
++							       stack_pointer_rtx, r12)));
+     }
+ 
+-  if (need_barrier_p)
+-    loongarch_emit_stack_tie ();
++  /* Make sure nothing is scheduled before we are done.  */
++  emit_insn (gen_blockage ());
++}
+ 
+-  /* Deallocate the final bit of the frame.  */
+-  if (step2 > 0)
+-    {
+-      insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+-				       GEN_INT (step2)));
++/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
++   absolute addresses.  */
++const char *
++loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
++{
++  static int labelno = 0;
++  char loop_lab[32], tmp[64];
++  rtx xops[3];
+ 
+-      rtx dwarf = NULL_RTX;
+-      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+-					 const0_rtx);
+-      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
+-      RTX_FRAME_RELATED_P (insn) = 1;
++  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
+ 
+-      REG_NOTES (insn) = dwarf;
++  /* Loop.  */
++  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
++
++  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
++  xops[0] = reg1;
++  xops[1] = GEN_INT (-PROBE_INTERVAL);
++  xops[2] = reg3;
++  if (TARGET_64BIT)
++    output_asm_insn ("sub.d\t%0,%0,%2", xops);
++  else
++    output_asm_insn ("sub.w\t%0,%0,%2", xops);
++
++  /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch.  */
++  xops[1] = reg2;
++  strcpy (tmp, "bne\t%0,%1,");
++  if (TARGET_64BIT)
++    output_asm_insn ("st.d\t$r0,%0,0", xops);
++  else
++    output_asm_insn ("st.w\t$r0,%0,0", xops);
++  output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
++
++  return "";
++}
++
++/* Expand the "prologue" pattern.  */
++
++void
++loongarch_expand_prologue (void)
++{
++  struct loongarch_frame_info *frame;
++  HOST_WIDE_INT size;
++  rtx insn;
++  HOST_WIDE_INT realign_size;
++  HOST_WIDE_INT offset;
++  unsigned mask;
++  HOST_WIDE_INT saved_gpr_num = 0;
++
++  /* Finalize crtl->stack_realign_needed and frame_pointer_needed flags.  */
++  if((crtl->stack_realign_needed || (!flag_omit_frame_pointer && optimize)) && loongarch_stack_realign)
++    {
++      unsigned int incoming_stack_boundary
++	= (crtl->parm_stack_boundary > PREFERRED_STACK_BOUNDARY
++	   ? crtl->parm_stack_boundary : PREFERRED_STACK_BOUNDARY);
++      unsigned int stack_alignment
++	= (crtl->is_leaf
++	   ? crtl->max_used_stack_slot_alignment
++	   : crtl->stack_alignment_needed);
++      unsigned int stack_realign
++	= (incoming_stack_boundary < stack_alignment);
++
++      if ((get_frame_size () + crtl->outgoing_args_size) == 0
++	  && (crtl->args.size == 0)
++          && frame_pointer_needed
++          && crtl->is_leaf
++          && crtl->sp_is_unchanging
++          && !cfun->calls_alloca
++          && !crtl->calls_eh_return
++          && !(STACK_CHECK_MOVING_SP
++	       && flag_stack_check
++	       && flag_exceptions
++	       && cfun->can_throw_non_call_exceptions))
++	{
++	  /* If drap has been set, but it actually isn't live at the
++	     start of the function, there is no reason to set it up.  */
++	  if (crtl->drap_reg)
++	    {
++	      basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
++	      if (! REGNO_REG_SET_P (DF_LR_IN (bb),
++				     REGNO (crtl->drap_reg)))
++		{
++		  crtl->drap_reg = NULL_RTX;
++		  crtl->need_drap = false;
++		}
++	    }
++	  frame_pointer_needed = false;
++	  crtl->stack_realign_needed = false;
++	  crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
++	  crtl->stack_alignment_needed = incoming_stack_boundary;
++	  crtl->stack_alignment_estimated = incoming_stack_boundary;
++	  if (crtl->preferred_stack_boundary > incoming_stack_boundary)
++	    crtl->preferred_stack_boundary = incoming_stack_boundary;
++
++	  df_finish_pass (true);
++	  df_scan_alloc (NULL);
++	  df_scan_blocks ();
++	  df_compute_regs_ever_live (true);
++	  df_analyze ();
++	  loongarch_compute_frame_info();
++	}
++    }
++
++  frame = &cfun->machine->frame;
++  size = frame->total_size;
++
++  mask = frame->mask;
++
++  realign_size = crtl->stack_alignment_needed / BITS_PER_UNIT;
++
++  if (flag_stack_usage_info)
++    {
++      if (stack_realign_drap)
++        {
++          current_function_dynamic_stack_size += crtl->stack_alignment_needed / BITS_PER_UNIT;
++        }
++          current_function_static_stack_size = size;
+     }
+ 
+-  if (use_restore_libcall)
++  /* When stack_realign_drap is true, save current sp in drap-reg then realign.  */
++  if (stack_realign_drap)
+     {
+-      rtx dwarf = loongarch_adjust_libcall_cfi_epilogue ();
+-      insn = emit_insn (gen_gpr_restore (GEN_INT (loongarch_save_libcall_count (mask))));
++      rtx tmp_reg = plus_constant (Pmode, stack_pointer_rtx, 0);
++      insn = emit_insn (gen_rtx_SET (crtl->drap_reg, tmp_reg));
+       RTX_FRAME_RELATED_P (insn) = 1;
+-      REG_NOTES (insn) = dwarf;
+ 
+-      emit_jump_insn (gen_gpr_restore_return (ra));
+-      return;
++      int log2_realigned_bytes = exact_log2 (realign_size);
++      tmp_reg = gen_rtx_REG (Pmode, GP_REG_FIRST);
++      insn = emit_insn (gen_insvdi (stack_pointer_rtx,
++				    GEN_INT (log2_realigned_bytes),
++				    const0_rtx,
++				    tmp_reg));
++      insn = gen_anddi3 (stack_pointer_rtx,
++			 stack_pointer_rtx,
++			 GEN_INT (-realign_size));
++      loongarch_set_frame_expr (insn);
+     }
+ 
+-  /* Add in the __builtin_eh_return stack adjustment. */
+-  if (crtl->calls_eh_return)
+-    emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+-			      EH_RETURN_STACKADJ_RTX));
++  /* Save the registers.  */
++  if ((frame->mask | frame->fmask) != 0)
++    {
++      HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
+ 
+-  if (!sibcall_p)
+-    emit_jump_insn (gen_simple_return_internal (ra));
+-}
++      /* Save fp first for dwarf.  */
++      if (stack_realign_drap)
++	{
++	  gcc_assert (step1 % realign_size == 0);
++	  if (frame->mask & (1LL << HARD_FRAME_POINTER_REGNUM))
++	    {
++	      emit_insn (gen_add3_insn (stack_pointer_rtx,
++					stack_pointer_rtx,
++					GEN_INT (-(frame->total_size
++						   - frame->gp_sp_offset))));
++	      step1 -= (frame->total_size - frame->gp_sp_offset);
++	      loongarch_save_restore_reg (word_mode, HARD_FRAME_POINTER_REGNUM,
++					  0, loongarch_save_reg);
++	      cfun->machine->frame.mask
++		= frame->mask & ~(1LL << HARD_FRAME_POINTER_REGNUM);
++	      saved_gpr_num ++;
++	    }
++	  /* Set up the frame pointer, if we're using one.  */
++	  if (frame_pointer_needed)
++	    {
++	      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
++	      RTX_FRAME_RELATED_P (insn) = 1;
+ 
+-
+-static rtx loongarch_find_pic_call_symbol (rtx_insn *, rtx, bool);
+-static int loongarch_register_move_cost (machine_mode, reg_class_t,
+-				    reg_class_t);
+-
+-/* Predicates to test for presence of "near"/"short_call" and "far"/"long_call"
+-   attributes on the given TYPE.  */
++	      loongarch_emit_stack_tie ();
++	    }
++	}
+ 
+-static bool
+-loongarch_near_type_p (const_tree type)
+-{
+-  return (lookup_attribute ("short_call", TYPE_ATTRIBUTES (type)) != NULL
+-	  || lookup_attribute ("near", TYPE_ATTRIBUTES (type)) != NULL);
+-}
++      if (!IMM12_OPERAND (-step1) && stack_realign_drap)
++	{
++	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-step1));
++	  emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
++				LARCH_PROLOGUE_TEMP (Pmode)));
+ 
+-static bool
+-loongarch_far_type_p (const_tree type)
+-{
+-  return (lookup_attribute ("long_call", TYPE_ATTRIBUTES (type)) != NULL
+-	  || lookup_attribute ("far", TYPE_ATTRIBUTES (type)) != NULL);
+-}
++	  /* Describe the effect of the previous instructions.  */
++	  insn = plus_constant (Pmode, stack_pointer_rtx, -step1);
++	  insn = gen_rtx_SET (stack_pointer_rtx, insn);
++	  loongarch_set_frame_expr (insn);
++	}
++      else
++	{
++	  insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
++				GEN_INT (-step1));
++	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
++	}
+ 
++      if (saved_gpr_num && stack_realign_drap)
++	size -= (step1 + frame->total_size - frame->gp_sp_offset);
++      else
++	size -= step1;
+ 
+-/* Check if the interrupt attribute is set for a function.  */
++      if (stack_realign_drap && (frame->mask & (1LL << find_drap_reg ())))
++        {
++          offset = cfun->machine->frame.gp_sp_offset - size
++            - UNITS_PER_WORD * saved_gpr_num;
++          loongarch_save_restore_reg (word_mode, find_drap_reg (),
++				      offset, loongarch_save_reg);
++          cfun->machine->frame.mask
++            = frame->mask & ~(1LL << (find_drap_reg ()));
++          saved_gpr_num ++;
++        }
+ 
+-static bool
+-loongarch_interrupt_type_p (tree type)
+-{
+-  return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
+-}
++      cfun->machine->frame.gpr_saved_num = saved_gpr_num;
++      loongarch_for_each_saved_reg (size, loongarch_save_reg);
++      cfun->machine->frame.mask = mask;
++    }
+ 
+-/* Implement TARGET_COMP_TYPE_ATTRIBUTES.  */
++  /* Set up the frame pointer, if we're using one.  */
++  if (frame_pointer_needed && !stack_realign_drap)
++    {
++      insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
++			    GEN_INT (frame->hard_frame_pointer_offset - size));
++      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+ 
+-static int
+-loongarch_comp_type_attributes (const_tree type1, const_tree type2)
+-{
+-  /* Disallow mixed near/far attributes.  */
+-  if (loongarch_far_type_p (type1) && loongarch_near_type_p (type2))
+-    return 0;
+-  if (loongarch_near_type_p (type1) && loongarch_far_type_p (type2))
+-    return 0;
+-  return 1;
+-}
++      loongarch_emit_stack_tie ();
++    }
+ 
+-/* Implement TARGET_INSERT_ATTRIBUTES.  */
++  /* Stack realign when stack_realign_fp is true.  */
++  if (stack_realign_fp)
++    {
++      int log2_realigned_bytes = exact_log2 (realign_size);
++      rtx tmp_reg = gen_rtx_REG (Pmode, GP_REG_FIRST);
++      insn = emit_insn (gen_insvdi (stack_pointer_rtx,
++				    GEN_INT (log2_realigned_bytes),
++				    const0_rtx,
++				    tmp_reg));
++      insn = gen_anddi3 (stack_pointer_rtx,
++			 stack_pointer_rtx,
++			 GEN_INT (-realign_size));
++      loongarch_set_frame_expr (insn);
++    }
+ 
+-static void
+-loongarch_insert_attributes (tree decl, tree *attributes)
+-{
+-}
++  /* Allocate the rest of the frame.  */
++  if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
++       || flag_stack_clash_protection)
++      && size > 0)
++    {
++      loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
+ 
+-/* Implement TARGET_MERGE_DECL_ATTRIBUTES.  */
++      /* Describe the effect of the previous instructions.  */
++      insn = plus_constant (Pmode, stack_pointer_rtx, -size);
++      insn = gen_rtx_SET (stack_pointer_rtx, insn);
++      loongarch_set_frame_expr (insn);
++    }
++  else
++    {
++      if (size > 0)
++	{
++	  if (stack_realign_drap)
++	    gcc_assert (size % realign_size == 0);
+ 
+-static tree
+-loongarch_merge_decl_attributes (tree olddecl, tree newdecl)
+-{
+-  return merge_attributes (DECL_ATTRIBUTES (olddecl),
+-			   DECL_ATTRIBUTES (newdecl));
++	  if (IMM12_OPERAND (-size))
++	    {
++	      insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
++				    GEN_INT (-size));
++	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
++	    }
++	  else
++	    {
++	      loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
++	      emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
++					LARCH_PROLOGUE_TEMP (Pmode)));
++
++	      /* Describe the effect of the previous instructions.  */
++	      insn = plus_constant (Pmode, stack_pointer_rtx, -size);
++	      insn = gen_rtx_SET (stack_pointer_rtx, insn);
++	      loongarch_set_frame_expr (insn);
++	    }
++	}
++    }
+ }
+ 
+-/* Implement TARGET_CAN_INLINE_P.  */
++/* Return nonzero if this function is known to have a null epilogue.
++   This allows the optimizer to omit jumps to jumps if no stack
++   was created.  */
+ 
+-static bool
+-loongarch_can_inline_p (tree caller, tree callee)
++bool
++loongarch_can_use_return_insn (void)
+ {
+-  return default_target_can_inline_p (caller, callee);
++  return reload_completed && cfun->machine->frame.total_size == 0;
+ }
+ 
+-/* Handle an "interrupt" attribute with an optional argument.  */
++/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
++   says which.  */
+ 
+-static tree
+-loongarch_handle_interrupt_attr (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+-			    int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
++void
++loongarch_expand_epilogue (bool sibcall_p)
+ {
+-  /* Check for an argument.  */
+-  if (is_attribute_p ("interrupt", name) && args != NULL)
++  /* Split the frame into two.  STEP1 is the amount of stack we should
++     deallocate before restoring the registers.  STEP2 is the amount we
++     should deallocate afterwards.
++
++     Start off by assuming that no registers need to be restored.  */
++  struct loongarch_frame_info *frame = &cfun->machine->frame;
++  unsigned mask = frame->mask;
++  HOST_WIDE_INT step1 = frame->total_size;
++  HOST_WIDE_INT step2 = 0;
++  rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
++  rtx insn;
++  HOST_WIDE_INT offset;
++  HOST_WIDE_INT saved_gpr_num = 0;
++
++  /* We need to add memory barrier to prevent read from deallocated stack.  */
++  bool need_barrier_p
++    = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
++
++  if (!sibcall_p && loongarch_can_use_return_insn ())
+     {
+-      tree cst;
++      emit_jump_insn (gen_return ());
++      return;
++    }
+ 
+-      cst = TREE_VALUE (args);
+-      if (TREE_CODE (cst) != STRING_CST)
+-	{
+-	  warning (OPT_Wattributes,
+-		   "%qE attribute requires a string argument",
+-		   name);
+-	  *no_add_attrs = true;
+-	}
+-      else if (strcmp (TREE_STRING_POINTER (cst), "eic") != 0
+-	       && strncmp (TREE_STRING_POINTER (cst), "vector=", 7) != 0)
+-	{
+-	  warning (OPT_Wattributes,
+-		   "argument to %qE attribute is neither eic, nor "
+-		   "vector=<line>", name);
+-	  *no_add_attrs = true;
+-	}
+-      else if (strncmp (TREE_STRING_POINTER (cst), "vector=", 7) == 0)
+-	{
+-	  const char *arg = TREE_STRING_POINTER (cst) + 7;
++  if (!stack_realign_fp)
++    {
++      /* Move past any dynamic stack allocations.  */
++      if (cfun->calls_alloca)
++        {
++            /* Emit a barrier to prevent loads from a deallocated stack.  */
++            loongarch_emit_stack_tie ();
++            need_barrier_p = false;
+ 
+-	  /* Acceptable names are: sw0,sw1,hw0,hw1,hw2,hw3,hw4,hw5.  */
+-	  if (strlen (arg) != 3
+-	      || (arg[0] != 's' && arg[0] != 'h')
+-	      || arg[1] != 'w'
+-	      || (arg[0] == 's' && arg[2] != '0' && arg[2] != '1')
+-	      || (arg[0] == 'h' && (arg[2] < '0' || arg[2] > '5')))
++            rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
++            if (!IMM12_OPERAND (INTVAL (adjust)))
+ 	    {
+-	      warning (OPT_Wattributes,
+-		       "interrupt vector to %qE attribute is not "
+-		       "vector=(sw0|sw1|hw0|hw1|hw2|hw3|hw4|hw5)",
+-		       name);
+-	      *no_add_attrs = true;
++	      loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
++	      adjust = LARCH_PROLOGUE_TEMP (Pmode);
+ 	    }
+-	}
+ 
+-      return NULL_TREE;
+-    }
++            insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
++					     hard_frame_pointer_rtx,
++					     adjust));
+ 
+-  return NULL_TREE;
+-}
++            if (!(stack_realign_drap))
++	      {
++		rtx dwarf = NULL_RTX;
++		rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset);
++		rtx cfa_adjust_value = gen_rtx_PLUS (Pmode,
++						     hard_frame_pointer_rtx,
++						     minus_offset);
+ 
+-/* Handle a "use_shadow_register_set" attribute with an optional argument.  */
++		rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
++		dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
++		RTX_FRAME_RELATED_P (insn) = 1;
+ 
+-static tree
+-loongarch_handle_use_shadow_register_set_attr (tree *node ATTRIBUTE_UNUSED,
+-					  tree name, tree args,
+-					  int flags ATTRIBUTE_UNUSED,
+-					  bool *no_add_attrs)
+-{
+-  /* Check for an argument.  */
+-  if (is_attribute_p ("use_shadow_register_set", name) && args != NULL)
+-    {
+-      tree cst;
++		REG_NOTES (insn) = dwarf;
++	    }
++        }
+ 
+-      cst = TREE_VALUE (args);
+-      if (TREE_CODE (cst) != STRING_CST)
+-	{
+-	  warning (OPT_Wattributes,
+-		   "%qE attribute requires a string argument",
+-		   name);
+-	  *no_add_attrs = true;
+-	}
+-      else if (strcmp (TREE_STRING_POINTER (cst), "intstack") != 0)
+-	{
+-	  warning (OPT_Wattributes,
+-		   "argument to %qE attribute is not intstack", name);
+-	  *no_add_attrs = true;
+-	}
++      /* If we need to restore registers, deallocate as much stack as
++         possible in the second step without going out of range.  */
++      if ((frame->mask | frame->fmask) != 0)
++        {
++          step2 = loongarch_first_stack_step (frame);
++          step1 -= step2;
++        }
+ 
+-      return NULL_TREE;
+-    }
++      /* Set TARGET to BASE + STEP1.  */
++      if (step1 > 0)
++        {
++          /* Emit a barrier to prevent loads from a deallocated stack.  */
++          loongarch_emit_stack_tie ();
++          need_barrier_p = false;
+ 
+-  return NULL_TREE;
+-}
+-
+-/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
+-   and *OFFSET_PTR.  Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise.  */
++          /* Get an rtx for STEP1 that we can add to BASE.  */
++          rtx adjust = GEN_INT (step1);
++          if (!IMM12_OPERAND (step1))
++	    {
++	      loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
++	      adjust = LARCH_PROLOGUE_TEMP (Pmode);
++	    }
+ 
+-static void
+-loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
+-{
+-  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
++          insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
++					   stack_pointer_rtx,
++					   adjust));
++
++          rtx dwarf = NULL_RTX;
++          rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode,
++					     stack_pointer_rtx,
++					     GEN_INT (step2));
++
++          dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
++          RTX_FRAME_RELATED_P (insn) = 1;
++
++          REG_NOTES (insn) = dwarf;
++        }
++
++      /* Restore drap reg and fp reg first when stack_realign_drap is true.  */
++      if (stack_realign_drap)
++        {
++          if (frame->mask & (1LL << HARD_FRAME_POINTER_REGNUM))
++            {
++	      offset = cfun->machine->frame.gp_sp_offset
++		- (frame->total_size - step2) ;
++              loongarch_save_restore_reg (word_mode, HARD_FRAME_POINTER_REGNUM,
++					  offset, loongarch_restore_reg);
++              cfun->machine->frame.mask
++		= frame->mask & ~(1LL << HARD_FRAME_POINTER_REGNUM);
++	      saved_gpr_num ++;
++            }
++          if (frame->mask & (1LL << find_drap_reg ()))
++            {
++              offset = cfun->machine->frame.gp_sp_offset
++		- (frame->total_size - step2) - UNITS_PER_WORD * saved_gpr_num;
++              loongarch_save_restore_reg (word_mode, find_drap_reg (),
++					  offset, loongarch_restore_reg);
++              cfun->machine->frame.mask
++		= frame->mask & ~(1LL << (find_drap_reg ()));
++              saved_gpr_num ++;
++            }
++            cfun->machine->frame.gpr_saved_num = saved_gpr_num;
++        }
++    }
++  else /* stack_realign_fp.  */
+     {
+-      *base_ptr = XEXP (x, 0);
+-      *offset_ptr = INTVAL (XEXP (x, 1));
++      /* If we need to restore registers, deallocate as much stack as
++         possible in the second step without going out of range.  */
++      if ((frame->mask | frame->fmask) != 0)
++        {
++          step2 = loongarch_first_stack_step (frame);
++	  rtx tmp_reg = plus_constant (Pmode,
++				       hard_frame_pointer_rtx,
++				       -(frame->hard_frame_pointer_offset
++					 - frame->frame_pointer_offset));
++          insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, tmp_reg));
++	  RTX_FRAME_RELATED_P (insn) = 1;
++        }
+     }
+-  else
++
++  /* Restore the registers.  */
++  loongarch_for_each_saved_reg (frame->total_size - step2,
++				loongarch_restore_reg);
++
++  cfun->machine->frame.mask = mask;
++
++  if (need_barrier_p)
++    loongarch_emit_stack_tie ();
++
++  /* Deallocate the final bit of the frame.  */
++  if (step2 > 0)
+     {
+-      *base_ptr = x;
+-      *offset_ptr = 0;
++      if (stack_realign_drap)
++        {
++          rtx tmp_reg = gen_rtx_REG (Pmode, find_drap_reg ());
++          insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
++					   tmp_reg,
++					   const0_rtx));
++        }
++      else
++        insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
++					 stack_pointer_rtx,
++					 GEN_INT (step2)));
++
++      rtx dwarf = NULL_RTX;
++      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx);
++      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
++      RTX_FRAME_RELATED_P (insn) = 1;
++
++      REG_NOTES (insn) = dwarf;
+     }
++
++  /* Add in the __builtin_eh_return stack adjustment.  */
++  if (crtl->calls_eh_return)
++    emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
++			      EH_RETURN_STACKADJ_RTX));
++
++  if (!sibcall_p)
++    emit_jump_insn (gen_simple_return_internal (ra));
+ }
+-
+-static unsigned int loongarch_build_integer (struct loongarch_integer_op *,
+-                                       unsigned HOST_WIDE_INT);
++
++#define LU32I_B (0xfffffULL << 32)
++#define LU52I_B (0xfffULL << 52)
+ 
+ /* Fill CODES with a sequence of rtl operations to load VALUE.
+-   Return the number of operations needed.
+-   Split interger in loongarch_output_move. */
++   Return the number of operations needed.  */
+ 
+ static unsigned int
+ loongarch_build_integer (struct loongarch_integer_op *codes,
+-                   unsigned HOST_WIDE_INT value)
++			 HOST_WIDE_INT value)
++
+ {
+-  uint32_t hi32, lo32;
+-  char all0_bit_vec, sign_bit_vec, allf_bit_vec, paritial_is_sext_of_prev;
+   unsigned int cost = 0;
+ 
+-  lo32 = value & 0xffffffff;
+-  hi32 = value >> 32;
+-
+-  all0_bit_vec = (((hi32 & 0xfff00000) == 0) << 3)
+-               | (((hi32 & 0x000fffff) == 0) << 2)
+-               | (((lo32 & 0xfffff000) == 0) << 1)
+-               | ((lo32 & 0x00000fff) == 0);
+-  sign_bit_vec = (((hi32 & 0x80000000) != 0) << 3)
+-               | (((hi32 & 0x00080000) != 0) << 2)
+-               | (((lo32 & 0x80000000) != 0) << 1)
+-               | ((lo32 & 0x00000800) != 0);
+-  allf_bit_vec = (((hi32 & 0xfff00000) == 0xfff00000) << 3)
+-           | (((hi32 & 0x000fffff) == 0x000fffff) << 2)
+-           | (((lo32 & 0xfffff000) == 0xfffff000) << 1)
+-           | ((lo32 & 0x00000fff) == 0x00000fff);
+-  paritial_is_sext_of_prev = (all0_bit_vec ^ allf_bit_vec)
+-                           & (all0_bit_vec ^ (sign_bit_vec << 1));
+-
+-  do
+-    {
+-    if (paritial_is_sext_of_prev == 0x7)
+-      {
+-        codes[0].code = UNKNOWN;
+-	codes[0].method = METHOD_LU52I;
+-        codes[0].value = value & 0xfff0000000000000;
+-	cost++;
+-        break;
+-      }
+-    if ((all0_bit_vec & 0x3) == 0x2)
+-      {
+-        codes[cost].code = UNKNOWN;
+-	codes[cost].method = METHOD_NORMAL;
+-        codes[cost].value = value & 0xfff;
+-	cost++;
+-      }
+-    else
+-      {
+-        switch (paritial_is_sext_of_prev & 0x3)
+-	{
+-	case 0:
+-	  codes[cost].code = UNKNOWN;
+-	  codes[cost].method = METHOD_NORMAL;
+-	  codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000;
+-	  cost++;
+-	  codes[cost].code = IOR;
+-	  codes[cost].method = METHOD_NORMAL;
+-	  codes[cost].value = value & 0xfff;
+-	  cost++;
+-	  break;
+-	case 1:
+-	  codes[cost].code = UNKNOWN;
+-	  codes[cost].method = METHOD_NORMAL;
+-	  codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000;
+-	  cost++;
+-	  break;
+-	case 2:
+-	  codes[cost].code = UNKNOWN;
+-	  codes[cost].method = METHOD_NORMAL;
+-	  codes[cost].value = (HOST_WIDE_INT)value << 52 >> 52;
+-	  cost++;
+-	  break;
+-	case 3:
+-	  codes[cost].code = UNKNOWN;
+-	  codes[cost].method = METHOD_NORMAL;
+-	  codes[cost].value = 0;
++  /* Get the lower 32 bits of the value.  */
++  HOST_WIDE_INT low_part = (int32_t)value;
++
++  if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part))
++    {
++      /* The value of the lower 32 bit be loaded with one instruction.
++	 lu12i.w.  */
++      codes[0].code = UNKNOWN;
++      codes[0].method = METHOD_NORMAL;
++      codes[0].value = low_part;
++      cost++;
++    }
++  else
++    {
++      /* lu12i.w + ior.  */
++      codes[0].code = UNKNOWN;
++      codes[0].method = METHOD_NORMAL;
++      codes[0].value = low_part & ~(IMM_REACH - 1);
++      cost++;
++      HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
++      if (iorv != 0)
++	{
++	  codes[1].code = IOR;
++	  codes[1].method = METHOD_NORMAL;
++	  codes[1].value = iorv;
+ 	  cost++;
+-	  break;
+-	default:
+-	   gcc_unreachable ();
+ 	}
+-      }
++    }
+ 
+-      if (((value & 0xfffffffffffff800) ^ 0xfff00000fffff800) == 0)
++  if (TARGET_64BIT)
++    {
++      bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B};
++      bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B};
++
++      int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
++      int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
++      /* Determine whether the upper 32 bits are sign-extended from the lower
++	 32 bits. If it is, the instructions to load the high order can be
++	 ommitted.  */
++      if (lu32i[sign31] && lu52i[sign31])
++	return cost;
++      /* Determine whether bits 32-51 are sign-extended from the lower 32
++	 bits. If so, directly load 52-63 bits.  */
++      else if (lu32i[sign31])
+ 	{
+-	  codes[cost].method = METHOD_INSV;
+-	  cost++;
+-	  break;
++	  codes[cost].method = METHOD_LU52I;
++	  codes[cost].value = value & LU52I_B;
++	  return cost + 1;
+ 	}
+ 
+-      switch (paritial_is_sext_of_prev >> 2)
+-        {
+-        case 0:
+-          codes[cost].method = METHOD_LU32I;
+-          codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000;
+-          cost++;
+-        case 1:
++      codes[cost].method = METHOD_LU32I;
++      codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
++      cost++;
++
++      /* Determine whether the 52-61 bits are sign-extended from the low order,
++	 and if not, load the 52-61 bits.  */
++      if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51])
++	{
+ 	  codes[cost].method = METHOD_LU52I;
+-          codes[cost].value = value & 0xfff0000000000000;
+-          cost++;
+-          break;
+-        case 2:
+-          codes[cost].method = METHOD_LU32I;
+-          codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000;
+-          cost++;
+-          break;
+-	case 3:
+-	  break;
+-        default:
+-          gcc_unreachable ();
+-        }
++	  codes[cost].value = value & LU52I_B;
++	  cost++;
++	}
+     }
+-  while (0);
++
++  gcc_assert (cost <= LARCH_MAX_INTEGER_OPS);
+ 
+   return cost;
+ }
+-
++
+ /* Fill CODES with a sequence of rtl operations to load VALUE.
+    Return the number of operations needed.
+-   Split interger in loongarch_output_move. */
++   Split interger in loongarch_output_move.  */
+ 
+ static unsigned int
+ loongarch_integer_cost (HOST_WIDE_INT value)
+ {
+   struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
+-  return loongarch_build_integer(codes, value);
++  return loongarch_build_integer (codes, value);
+ }
+ 
+ /* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+@@ -1785,14 +1846,13 @@ loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+ {
+   return loongarch_const_insns (x) > 0;
+ }
+-
+ 
+ /* Return true if X is a thread-local symbol.  */
+ 
+ static bool
+ loongarch_tls_symbol_p (rtx x)
+ {
+-  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
++  return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
+ }
+ 
+ /* Return true if SYMBOL_REF X is associated with a global symbol
+@@ -1809,9 +1869,6 @@ loongarch_global_symbol_p (const_rtx x)
+   if (!decl)
+     return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
+ 
+-  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
+-     or weak symbols.  Relocations in the object file will be against
+-     the target symbol, so it's that symbol's binding that matters here.  */
+   return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
+ }
+ 
+@@ -1826,9 +1883,6 @@ loongarch_global_symbol_noweak_p (const_rtx x)
+   if (!decl)
+     return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
+ 
+-  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
+-     or weak symbols.  Relocations in the object file will be against
+-     the target symbol, so it's that symbol's binding that matters here.  */
+   return DECL_P (decl) && TREE_PUBLIC (decl);
+ }
+ 
+@@ -1841,7 +1895,6 @@ loongarch_weak_symbol_p (const_rtx x)
+   return DECL_P (decl) && DECL_WEAK (decl);
+ }
+ 
+-
+ /* Return true if SYMBOL_REF X binds locally.  */
+ 
+ bool
+@@ -1850,9 +1903,8 @@ loongarch_symbol_binds_local_p (const_rtx x)
+   if (GET_CODE (x) == LABEL_REF)
+     return false;
+ 
+-  return (SYMBOL_REF_DECL (x)
+-	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+-	  : SYMBOL_REF_LOCAL_P (x));
++  return (SYMBOL_REF_DECL (x) ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
++			      : SYMBOL_REF_LOCAL_P (x));
+ }
+ 
+ /* Return true if OP is a constant vector with the number of units in MODE,
+@@ -1995,38 +2047,34 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode)
+ static bool
+ loongarch_rtx_constant_in_small_data_p (machine_mode mode)
+ {
+-  return (GET_MODE_SIZE (mode) <= loongarch_small_data_threshold);
++  return (GET_MODE_SIZE (mode) <= g_switch_value);
+ }
+ 
+ /* Return the method that should be used to access SYMBOL_REF or
+-   LABEL_REF X in context CONTEXT.  */
++   LABEL_REF X.  */
+ 
+ static enum loongarch_symbol_type
+-loongarch_classify_symbol (const_rtx x, enum loongarch_symbol_context context)
++loongarch_classify_symbol (const_rtx x)
+ {
+-  if (TARGET_RTP_PIC)
+-    return SYMBOL_GOT_DISP;
+-
+   if (GET_CODE (x) == LABEL_REF)
+-    {
+-      return SYMBOL_GOT_DISP;
+-    }
++    return SYMBOL_GOT_DISP;
+ 
+-  gcc_assert (GET_CODE (x) == SYMBOL_REF);
++  gcc_assert (SYMBOL_REF_P (x));
+ 
+   if (SYMBOL_REF_TLS_MODEL (x))
+     return SYMBOL_TLS;
+ 
+-  if (GET_CODE (x) == SYMBOL_REF)
++  if (SYMBOL_REF_P (x))
+     return SYMBOL_GOT_DISP;
++
++  return SYMBOL_GOT_DISP;
+ }
+ 
+-/* Return true if X is a symbolic constant that can be used in context
+-   CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
++/* Return true if X is a symbolic constant.  If it is,
++   store the type of the symbol in *SYMBOL_TYPE.  */
+ 
+ bool
+-loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context,
+-			  enum loongarch_symbol_type *symbol_type)
++loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
+ {
+   rtx offset;
+ 
+@@ -2036,9 +2084,9 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context,
+       *symbol_type = UNSPEC_ADDRESS_TYPE (x);
+       x = UNSPEC_ADDRESS (x);
+     }
+-  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
++  else if (SYMBOL_REF_P (x) || GET_CODE (x) == LABEL_REF)
+     {
+-      *symbol_type = loongarch_classify_symbol (x, context);
++      *symbol_type = loongarch_classify_symbol (x);
+       if (*symbol_type == SYMBOL_TLS)
+ 	return true;
+     }
+@@ -2052,8 +2100,6 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context,
+      relocations.  */
+   switch (*symbol_type)
+     {
+-      /* Fall through.  */
+-
+     case SYMBOL_GOT_DISP:
+     case SYMBOL_TLSGD:
+     case SYMBOL_TLSLDM:
+@@ -2062,17 +2108,25 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context,
+     }
+   gcc_unreachable ();
+ }
+-
+-/* Like loongarch_symbol_insns We rely on the fact that, in the worst case. */
++
++/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed
++   to load symbols of type TYPE into a register.  Return 0 if the given
++   type of symbol cannot be used as an immediate operand.
++
++   Otherwise, return the number of instructions needed to load or store
++   values of mode MODE to or from addresses of type TYPE.  Return 0 if
++   the given type of symbol is not valid in addresses.  */
+ 
+ static int
+-loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode)
++loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
+ {
+-  if (loongarch_use_pcrel_pool_p[(int) type])
+-    {
+-      /* The constant must be loaded and then dereferenced.  */
+-      return 0;
+-    }
++  /* LSX LD.* and ST.* cannot support loading symbols via an immediate
++     operand.  */
++  if (LSX_SUPPORTED_MODE_P (mode))
++    return 0;
++
++  if (LASX_SUPPORTED_MODE_P (mode))
++    return 0;
+ 
+   switch (type)
+     {
+@@ -2082,8 +2136,6 @@ loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode)
+       if (mode != MAX_MACHINE_MODE)
+ 	return 0;
+ 
+-      /* Fall through.  */
+-
+       return 3;
+ 
+     case SYMBOL_TLSGD:
+@@ -2097,30 +2149,6 @@ loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode)
+   gcc_unreachable ();
+ }
+ 
+-/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed
+-   to load symbols of type TYPE into a register.  Return 0 if the given
+-   type of symbol cannot be used as an immediate operand.
+-
+-   Otherwise, return the number of instructions needed to load or store
+-   values of mode MODE to or from addresses of type TYPE.  Return 0 if
+-   the given type of symbol is not valid in addresses.
+-
+-   In both cases, instruction counts are based off BASE_INSN_LENGTH.  */
+-
+-static int
+-loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
+-{
+-  /* LSX LD.* and ST.* cannot support loading symbols via an immediate
+-     operand.  */
+-  if (LSX_SUPPORTED_MODE_P (mode))
+-    return 0;
+-
+-  if (LASX_SUPPORTED_MODE_P (mode))
+-    return 0;
+-
+-  return loongarch_symbol_insns_1 (type, mode) * (1);
+-}
+-
+ /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+ 
+ static bool
+@@ -2129,11 +2157,6 @@ loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
+   enum loongarch_symbol_type type;
+   rtx base, offset;
+ 
+-  /* There is no assembler syntax for expressing an address-sized
+-     high part.  */
+-  if (GET_CODE (x) == HIGH)
+-    return true;
+-
+   /* As an optimization, reject constants that loongarch_legitimize_move
+      can expand inline.
+ 
+@@ -2147,16 +2170,12 @@ loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
+     return true;
+ 
+   split_const (x, &base, &offset);
+-  if (loongarch_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type))
++  if (loongarch_symbolic_constant_p (base, &type))
+     {
+-      /* See whether we explicitly want these symbols in the pool.  */
+-      if (loongarch_use_pcrel_pool_p[(int) type])
+-	return false;
+-
+       /* The same optimization as for CONST_INT.  */
+-      if (SMALL_INT (offset) && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0)
++      if (IMM12_INT (offset)
++	  && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0)
+ 	return true;
+-
+     }
+ 
+   /* TLS symbols must be computed by loongarch_legitimize_move.  */
+@@ -2166,22 +2185,13 @@ loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
+   return false;
+ }
+ 
+-/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  We can't use blocks for
+-   constants when we're using a per-function constant pool.  */
+-
+-static bool
+-loongarch_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
+-				const_rtx x ATTRIBUTE_UNUSED)
+-{
+-  return 1;
+-}
+-
+ /* Return true if register REGNO is a valid base register for mode MODE.
+    STRICT_P is true if REG_OK_STRICT is in effect.  */
+ 
+ int
+-loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode,
+-			       bool strict_p)
++loongarch_regno_mode_ok_for_base_p (int regno,
++				    machine_mode mode ATTRIBUTE_UNUSED,
++				    bool strict_p)
+ {
+   if (!HARD_REGISTER_NUM_P (regno))
+     {
+@@ -2196,7 +2206,6 @@ loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode,
+   if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
+     return true;
+ 
+-
+   return GP_REG_P (regno);
+ }
+ 
+@@ -2206,7 +2215,7 @@ loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode,
+ static bool
+ loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
+ {
+-  if (!strict_p && GET_CODE (x) == SUBREG)
++  if (!strict_p && SUBREG_P (x))
+     x = SUBREG_REG (x);
+ 
+   return (REG_P (x)
+@@ -2220,8 +2229,8 @@ static bool
+ loongarch_valid_offset_p (rtx x, machine_mode mode)
+ {
+   /* Check that X is a signed 12-bit number,
+-   * or check that X is a signed 16-bit number
+-   * and offset 4 byte aligned */
++     or check that X is a signed 16-bit number
++     and offset 4 byte aligned.  */
+   if (!(const_arith_operand (x, Pmode)
+ 	|| ((mode == E_SImode || mode == E_DImode)
+ 	    && const_imm16_operand (x, Pmode)
+@@ -2231,7 +2240,7 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
+   /* We may need to split multiword moves, so make sure that every word
+      is accessible.  */
+   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+-      && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
++      && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
+     return false;
+ 
+   /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
+@@ -2248,13 +2257,42 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
+   return true;
+ }
+ 
++static bool
++loongarch_valid_index_p (struct loongarch_address_info *info, rtx x,
++			 machine_mode mode, bool strict_p)
++{
++  rtx index;
++
++  if ((REG_P (x) || SUBREG_P (x))
++      && GET_MODE (x) == Pmode)
++    {
++      index = x;
++    }
++  else
++    return false;
++
++  if (!strict_p
++      && SUBREG_P (index)
++      && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
++    index = SUBREG_REG (index);
++
++  if (loongarch_valid_base_register_p (index, mode, strict_p))
++    {
++      info->type = ADDRESS_REG_REG;
++      info->offset = index;
++      return true;
++    }
++
++  return false;
++}
++
+ /* Return true if X is a valid address for machine mode MODE.  If it is,
+    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
+    effect.  */
+ 
+ static bool
+ loongarch_classify_address (struct loongarch_address_info *info, rtx x,
+-		       machine_mode mode, bool strict_p)
++			    machine_mode mode, bool strict_p)
+ {
+   switch (GET_CODE (x))
+     {
+@@ -2266,21 +2304,26 @@ loongarch_classify_address (struct loongarch_address_info *info, rtx x,
+       return loongarch_valid_base_register_p (info->reg, mode, strict_p);
+ 
+     case PLUS:
++/*
++      if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p)
++	  && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p))
++	{
++	  info->reg = XEXP (x, 0);
++	  return true;
++	}
++
++      if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p)
++	  && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p))
++	{
++	  info->reg = XEXP (x, 1);
++	  return true;
++	}
++*/
+       info->type = ADDRESS_REG;
+       info->reg = XEXP (x, 0);
+       info->offset = XEXP (x, 1);
+       return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
+ 	      && loongarch_valid_offset_p (info->offset, mode));
+-    #if 0
+-    case LABEL_REF:
+-    case SYMBOL_REF:
+-      info->type = ADDRESS_SYMBOLIC;
+-      return (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_MEM,
+-					&info->symbol_type)
+-	      && loongarch_symbol_insns (info->symbol_type, mode) > 0
+-	      && !loongarch_split_p[info->symbol_type]);
+-
+-    #endif
+     default:
+       return false;
+     }
+@@ -2296,39 +2339,21 @@ loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
+   return loongarch_classify_address (&addr, x, mode, strict_p);
+ }
+ 
+-/* Return true if X is a legitimate $sp-based address for mode MODE.  */
+-
+-bool
+-loongarch_stack_address_p (rtx x, machine_mode mode)
+-{
+-  struct loongarch_address_info addr;
+-
+-  return (loongarch_classify_address (&addr, x, mode, false)
+-	  && addr.type == ADDRESS_REG
+-	  && addr.reg == stack_pointer_rtx);
+-}
+-
+-/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load 
+-   indexed address instruction.  Note that such addresses are
+-   not considered legitimate in the TARGET_LEGITIMATE_ADDRESS_P
+-   sense, because their use is so restricted.  */
++/* Return true if ADDR matches the pattern for the indexed address
++   instruction.  */
+ 
+ static bool
+-loongarch_lx_address_p (rtx addr, machine_mode mode)
++loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
+ {
+   if (GET_CODE (addr) != PLUS
+       || !REG_P (XEXP (addr, 0))
+       || !REG_P (XEXP (addr, 1)))
+     return false;
+-  if (LSX_SUPPORTED_MODE_P (mode))
+-    return true;
+-  return false;
++  return true;
+ }
+-
+ 
+ /* Return the number of instructions needed to load or store a value
+-   of mode MODE at address X, assuming that BASE_INSN_LENGTH is the
+-   length of one instruction.  Return 0 if X isn't valid for MODE.
++   of mode MODE at address X.  Return 0 if X isn't valid for MODE.
+    Assume that multiword moves may need to be split into word moves
+    if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+    enough.  */
+@@ -2338,7 +2363,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+ {
+   struct loongarch_address_info addr;
+   int factor;
+-  bool lsx_p = (!might_split_p && (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)));
++  bool lsx_p = (!might_split_p &&
++		(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)));
+ 
+   if (!loongarch_classify_address (&addr, x, mode, false))
+     return 0;
+@@ -2367,6 +2393,9 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+ 	  }
+ 	return factor;
+ 
++      case ADDRESS_REG_REG:
++	return lsx_p ? 0 : factor;
++
+       case ADDRESS_CONST_INT:
+ 	return lsx_p ? 0 : factor;
+ 
+@@ -2380,7 +2409,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+    shifted left SHIFT bits before being used.  */
+ 
+ bool
+-loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0)
++loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits,
++				int shift = 0)
+ {
+   return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits));
+ }
+@@ -2389,7 +2419,8 @@ loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift =
+    shifted left SHIFT bits before being used.  */
+ 
+ bool
+-loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0)
++loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits,
++			      int shift = 0)
+ {
+   x += 1 << (bits + shift - 1);
+   return loongarch_unsigned_immediate_p (x, bits, shift);
+@@ -2408,20 +2439,6 @@ loongarch_ldst_scaled_shift (machine_mode mode)
+   return shift;
+ }
+ 
+-/* Return true if X is a legitimate address that conforms to the requirements
+-   for a microLARCH LWSP or SWSP insn.  */
+-
+-bool
+-lwsp_swsp_address_p (rtx x, machine_mode mode)
+-{
+-  struct loongarch_address_info addr;
+-
+-  return (loongarch_classify_address (&addr, x, mode, false)
+-	  && addr.type == ADDRESS_REG
+-	  && REGNO (addr.reg) == STACK_POINTER_REGNUM
+-	  && uw5_operand (addr.offset, mode));
+-}
+-
+ /* Return true if X is a legitimate address with a 12-bit offset.
+    MODE is the mode of the value being accessed.  */
+ 
+@@ -2433,54 +2450,47 @@ loongarch_12bit_offset_address_p (rtx x, machine_mode mode)
+   return (loongarch_classify_address (&addr, x, mode, false)
+ 	  && addr.type == ADDRESS_REG
+ 	  && CONST_INT_P (addr.offset)
+-	  && ULARCH_12BIT_OFFSET_P (INTVAL (addr.offset)));
++	  && LARCH_U12BIT_OFFSET_P (INTVAL (addr.offset)));
+ }
+ 
+-/* Return true if X is a legitimate address with a 9-bit offset.
++/* Return true if X is a legitimate address with a 14-bit offset shifted 2.
+    MODE is the mode of the value being accessed.  */
+ 
+ bool
+-loongarch_9bit_offset_address_p (rtx x, machine_mode mode)
++loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode)
+ {
+   struct loongarch_address_info addr;
+ 
+   return (loongarch_classify_address (&addr, x, mode, false)
+ 	  && addr.type == ADDRESS_REG
+ 	  && CONST_INT_P (addr.offset)
+-	  && LARCH_9BIT_OFFSET_P (INTVAL (addr.offset)));
++	  && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset))
++	  && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset)));
+ }
+ 
+-/* Return true if X is a legitimate address with a 14-bit offset shifted 2.
+-   MODE is the mode of the value being accessed.  */
+-
+ bool
+-loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode)
++loongarch_base_index_address_p (rtx x, machine_mode mode)
+ {
+   struct loongarch_address_info addr;
+ 
+   return (loongarch_classify_address (&addr, x, mode, false)
+-	  && addr.type == ADDRESS_REG
+-	  && CONST_INT_P (addr.offset)
+-	  && LISA_16BIT_OFFSET_P (INTVAL (addr.offset))
+-	  && LISA_SHIFT_2_OFFSET_P (INTVAL (addr.offset)));
++	  && addr.type == ADDRESS_REG_REG
++	  && REG_P (addr.offset));
+ }
+ 
+-
+ /* Return the number of instructions needed to load constant X,
+-   assuming that BASE_INSN_LENGTH is the length of one instruction.
+    Return 0 if X isn't a valid constant.  */
+ 
+ int
+ loongarch_const_insns (rtx x)
+ {
+-  struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
+   enum loongarch_symbol_type symbol_type;
+   rtx offset;
+ 
+   switch (GET_CODE (x))
+     {
+     case CONST_INT:
+-      return loongarch_build_integer (codes, INTVAL (x));
++      return loongarch_integer_cost (INTVAL (x));
+ 
+     case CONST_VECTOR:
+       if ((ISA_HAS_LSX || ISA_HAS_LASX)
+@@ -2488,19 +2498,18 @@ loongarch_const_insns (rtx x)
+ 	return 1;
+       /* Fall through.  */
+     case CONST_DOUBLE:
+-      /* Allow zeros for normal mode, where we can use $0.  */
+       return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+ 
+     case CONST:
+       /* See if we can refer to X directly.  */
+-      if (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type))
++      if (loongarch_symbolic_constant_p (x, &symbol_type))
+ 	return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE);
+ 
+       /* Otherwise try splitting the constant into a base and offset.
+-	 If the offset is a 16-bit value, we can load the base address
+-	 into a register and then use (D)ADDIU to add in the offset.
++	 If the offset is a 12-bit value, we can load the base address
++	 into a register and then use ADDI.{W/D} to add in the offset.
+ 	 If the offset is larger, we can load the base and offset
+-	 into separate registers and add them together with (D)ADDU.
++	 into separate registers and add them together with ADD.{W/D}.
+ 	 However, the latter is only possible before reload; during
+ 	 and after reload, we must have the option of forcing the
+ 	 constant into the pool instead.  */
+@@ -2510,18 +2519,18 @@ loongarch_const_insns (rtx x)
+ 	  int n = loongarch_const_insns (x);
+ 	  if (n != 0)
+ 	    {
+-	      if (SMALL_INT (offset))
++	      if (IMM12_INT (offset))
+ 		return n + 1;
+ 	      else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
+-		return n + 1 + loongarch_build_integer (codes, INTVAL (offset));
++		return n + 1 + loongarch_integer_cost (INTVAL (offset));
+ 	    }
+ 	}
+       return 0;
+ 
+     case SYMBOL_REF:
+     case LABEL_REF:
+-      return loongarch_symbol_insns (loongarch_classify_symbol (x, SYMBOL_CONTEXT_LEA),
+-				MAX_MACHINE_MODE);
++      return loongarch_symbol_insns (
++	loongarch_classify_symbol (x), MAX_MACHINE_MODE);
+ 
+     default:
+       return 0;
+@@ -2530,8 +2539,7 @@ loongarch_const_insns (rtx x)
+ 
+ /* X is a doubleword constant that can be handled by splitting it into
+    two words and loading each word separately.  Return the number of
+-   instructions required to do this, assuming that BASE_INSN_LENGTH
+-   is the length of one instruction.  */
++   instructions required to do this.  */
+ 
+ int
+ loongarch_split_const_insns (rtx x)
+@@ -2565,8 +2573,7 @@ loongarch_subword_at_byte (rtx op, unsigned int byte)
+ }
+ 
+ /* Return the number of instructions needed to implement INSN,
+-   given that it loads from or stores to MEM.  Assume that
+-   BASE_INSN_LENGTH is the length of one instruction.  */
++   given that it loads from or stores to MEM.  */
+ 
+ int
+ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
+@@ -2583,18 +2590,18 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
+   if (might_split_p)
+     {
+       set = single_set (insn);
+-      if (set && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set), insn))
++      if (set
++	  && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set)))
+ 	might_split_p = false;
+     }
+ 
+   return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
+ }
+ 
+-/* Return the number of instructions needed for an integer division,
+-   assuming that BASE_INSN_LENGTH is the length of one instruction.  */
++/* Return the number of instructions needed for an integer division.  */
+ 
+ int
+-loongarch_idiv_insns (machine_mode mode)
++loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED)
+ {
+   int count;
+ 
+@@ -2605,7 +2612,6 @@ loongarch_idiv_insns (machine_mode mode)
+   return count;
+ }
+ 
+-
+ /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)).  */
+ 
+ void
+@@ -2619,7 +2625,8 @@ loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
+    of mode MODE.  Return that new register.  */
+ 
+ static rtx
+-loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, rtx op1)
++loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0,
++			rtx op1)
+ {
+   rtx reg;
+ 
+@@ -2643,13 +2650,12 @@ loongarch_force_temporary (rtx dest, rtx value)
+     }
+ }
+ 
+-
+ /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
+    then add CONST_INT OFFSET to the result.  */
+ 
+ static rtx
+ loongarch_unspec_address_offset (rtx base, rtx offset,
+-			    enum loongarch_symbol_type symbol_type)
++				 enum loongarch_symbol_type symbol_type)
+ {
+   base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
+ 			 UNSPEC_ADDRESS_FIRST + symbol_type);
+@@ -2684,42 +2690,20 @@ loongarch_strip_unspec_address (rtx op)
+   return op;
+ }
+ 
+-
+-/* Return a base register that holds pic_offset_table_rtx.
+-   TEMP, if nonnull, is a scratch Pmode base register.  */
+-
+-rtx
+-loongarch_pic_base_register (rtx temp)
+-{
+-    return pic_offset_table_rtx;
+-
+-}
+-
+-/* If SRC is the RHS of a load_call<mode> insn, return the underlying symbol
+-   reference.  Return NULL_RTX otherwise.  */
+-
+-static rtx
+-loongarch_strip_unspec_call (rtx src)
+-{
+-  if (GET_CODE (src) == UNSPEC && XINT (src, 1) == UNSPEC_LOAD_CALL)
+-    return loongarch_strip_unspec_address (XVECEXP (src, 0, 1));
+-  return NULL_RTX;
+-}
+-
+ /* Return a legitimate address for REG + OFFSET.  TEMP is as for
+    loongarch_force_temporary; it is only needed when OFFSET is not a
+-   SMALL_OPERAND.  */
++   IMM12_OPERAND.  */
+ 
+ static rtx
+ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+ {
+-  if (!SMALL_OPERAND (offset))
++  if (!IMM12_OPERAND (offset))
+     {
+       rtx high;
+ 
+-      /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
+-         The addition inside the macro CONST_HIGH_PART may cause an
+-         overflow, so we need to force a sign-extension check.  */
++      /* Leave OFFSET as a 12-bit offset and put the excess in HIGH.
++	 The addition inside the macro CONST_HIGH_PART may cause an
++	 overflow, so we need to force a sign-extension check.  */
+       high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
+       offset = CONST_LOW_PART (offset);
+       high = loongarch_force_temporary (temp, high);
+@@ -2727,49 +2711,40 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+     }
+   return plus_constant (Pmode, reg, offset);
+ }
+-
++
+ /* The __tls_get_attr symbol.  */
+-static GTY(()) rtx loongarch_tls_symbol;
++static GTY (()) rtx loongarch_tls_symbol;
+ 
+ /* Load an entry from the GOT for a TLS GD access.  */
+ 
+-static rtx loongarch_got_load_tls_gd (rtx dest, rtx sym)
++static rtx
++loongarch_got_load_tls_gd (rtx dest, rtx sym)
+ {
+-  if (Pmode == DImode)
+-    return gen_got_load_tls_gddi (dest, sym);
+-  else
+-    return gen_got_load_tls_gdsi (dest, sym);
++  return PMODE_INSN (gen_got_load_tls_gd, (dest, sym));
+ }
+ 
+ /* Load an entry from the GOT for a TLS LD access.  */
+ 
+-static rtx loongarch_got_load_tls_ld (rtx dest, rtx sym)
++static rtx
++loongarch_got_load_tls_ld (rtx dest, rtx sym)
+ {
+-  if (Pmode == DImode)
+-    return gen_got_load_tls_lddi (dest, sym);
+-  else
+-    return gen_got_load_tls_ldsi (dest, sym);
++  return PMODE_INSN (gen_got_load_tls_ld, (dest, sym));
+ }
+ 
+-
+ /* Load an entry from the GOT for a TLS IE access.  */
+ 
+-static rtx loongarch_got_load_tls_ie (rtx dest, rtx sym)
++static rtx
++loongarch_got_load_tls_ie (rtx dest, rtx sym)
+ {
+-  if (Pmode == DImode)
+-    return gen_got_load_tls_iedi (dest, sym);
+-  else
+-    return gen_got_load_tls_iesi (dest, sym);
++  return PMODE_INSN (gen_got_load_tls_ie, (dest, sym));
+ }
+ 
+ /* Add in the thread pointer for a TLS LE access.  */
+ 
+-static rtx loongarch_got_load_tls_le (rtx dest, rtx sym)
++static rtx
++loongarch_got_load_tls_le (rtx dest, rtx sym)
+ {
+-  if (Pmode == DImode)
+-    return gen_got_load_tls_ledi (dest, sym);
+-  else
+-    return gen_got_load_tls_lesi (dest, sym);
++  return PMODE_INSN (gen_got_load_tls_le, (dest, sym));
+ }
+ 
+ /* Return an instruction sequence that calls __tls_get_addr.  SYM is
+@@ -2799,7 +2774,8 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+   else
+     gcc_unreachable ();
+ 
+-  insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, const0_rtx));
++  insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
++						  const0_rtx));
+   RTL_CONST_CALL_P (insn) = 1;
+   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
+   insn = get_insns ();
+@@ -2820,12 +2796,6 @@ loongarch_legitimize_tls_address (rtx loc)
+   enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
+   rtx_insn *insn;
+ 
+-  /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE.  */
+-  #if 0
+-  if (!flag_pic)
+-    model = TLS_MODEL_LOCAL_EXEC;
+-  #endif
+-
+   switch (model)
+     {
+     case TLS_MODEL_LOCAL_DYNAMIC:
+@@ -2843,7 +2813,7 @@ loongarch_legitimize_tls_address (rtx loc)
+       break;
+ 
+     case TLS_MODEL_INITIAL_EXEC:
+-      /* la.tls.ie; tp-relative add */
++      /* la.tls.ie; tp-relative add  */
+       tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+       tmp = gen_reg_rtx (Pmode);
+       emit_insn (loongarch_got_load_tls_ie (tmp, loc));
+@@ -2852,7 +2822,7 @@ loongarch_legitimize_tls_address (rtx loc)
+       break;
+ 
+     case TLS_MODEL_LOCAL_EXEC:
+-      /* la.tls.le; tp-relative add */
++      /* la.tls.le; tp-relative add  */
+       tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+       tmp = gen_reg_rtx (Pmode);
+       emit_insn (loongarch_got_load_tls_le (tmp, loc));
+@@ -2865,7 +2835,7 @@ loongarch_legitimize_tls_address (rtx loc)
+     }
+   return dest;
+ }
+-
++
+ rtx
+ loongarch_legitimize_call_address (rtx addr)
+ {
+@@ -2877,7 +2847,25 @@ loongarch_legitimize_call_address (rtx addr)
+     }
+   return addr;
+ }
+-
++
++/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
++   and *OFFSET_PTR.  Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise.  */
++
++static void
++loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
++{
++  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
++    {
++      *base_ptr = XEXP (x, 0);
++      *offset_ptr = INTVAL (XEXP (x, 1));
++    }
++  else
++    {
++      *base_ptr = x;
++      *offset_ptr = 0;
++    }
++}
++
+ /* If X is not a valid address for mode MODE, force it into a register.  */
+ 
+ static rtx
+@@ -2895,7 +2883,7 @@ loongarch_force_address (rtx x, machine_mode mode)
+ 
+ static rtx
+ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+-			 machine_mode mode)
++			      machine_mode mode)
+ {
+   rtx base, addr;
+   HOST_WIDE_INT offset;
+@@ -2941,28 +2929,30 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+ 	}
+       else
+ 	x = force_reg (mode, x);
++
+       switch (codes[i].method)
+ 	{
+ 	case METHOD_NORMAL:
+-	  x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
++	  x = gen_rtx_fmt_ee (codes[i].code, mode, x,
++			      GEN_INT (codes[i].value));
+ 	  break;
+ 	case METHOD_LU32I:
+-	  emit_insn (gen_rtx_SET (x, gen_rtx_IOR (DImode,
+-			     gen_rtx_ZERO_EXTEND (DImode,
+-				     gen_rtx_SUBREG (SImode, x, 0)),
+-			     GEN_INT (codes[i].value))));
++	  emit_insn (
++	    gen_rtx_SET (x,
++			 gen_rtx_IOR (DImode,
++				      gen_rtx_ZERO_EXTEND (
++					DImode, gen_rtx_SUBREG (SImode, x, 0)),
++				      GEN_INT (codes[i].value))));
+ 	  break;
+ 	case METHOD_LU52I:
+-	  emit_insn (gen_lu52i_d (x, x,
+-					GEN_INT (0xfffffffffffff),
+-				    GEN_INT (codes[i].value)));
++	  emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff),
++				  GEN_INT (codes[i].value)));
+ 	  break;
+ 	case METHOD_INSV:
+-          emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode,
+-							x,
+-							GEN_INT (20),
+-							GEN_INT (32)),
+-				  gen_rtx_REG (DImode, 0)));
++	  emit_insn (
++	    gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20),
++					       GEN_INT (32)),
++			 gen_rtx_REG (DImode, 0)));
+ 	  break;
+ 	default:
+ 	  gcc_unreachable ();
+@@ -2997,7 +2987,7 @@ loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
+ 
+   /* If we have (const (plus symbol offset)), and that expression cannot
+      be forced into memory, load the symbol first and add in the offset.
+-     prefer to do this even if the constant _can_ be forced into memory, 
++     prefer to do this even if the constant _can_ be forced into memory,
+      as it usually produces better code.  */
+   split_const (src, &base, &offset);
+   if (offset != const0_rtx
+@@ -3005,7 +2995,8 @@ loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
+ 	  || (can_create_pseudo_p ())))
+     {
+       base = loongarch_force_temporary (dest, base);
+-      loongarch_emit_move (dest, loongarch_add_offset (NULL, base, INTVAL (offset)));
++      loongarch_emit_move (dest,
++			   loongarch_add_offset (NULL, base, INTVAL (offset)));
+       return;
+     }
+ 
+@@ -3020,7 +3011,6 @@ loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
+ bool
+ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
+ {
+-
+   if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
+     {
+       loongarch_emit_move (dest, force_reg (mode, src));
+@@ -3029,10 +3019,9 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
+ 
+   /* Both src and dest are non-registers;  one special case is supported where
+      the source is (const_int 0) and the store can source the zero register.
+-     LSX and lasx are never able to source the zero register directly in 
++     LSX and LASX are never able to source the zero register directly in
+      memory operations.  */
+-  if (!register_operand (dest, mode)
+-      && !register_operand (src, mode)
++  if (!register_operand (dest, mode) && !register_operand (src, mode)
+       && (!const_0_operand (src, mode)
+ 	  || LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)))
+     {
+@@ -3049,40 +3038,26 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
+       return true;
+     }
+ 
+-  if ((GET_CODE (src) == SYMBOL_REF || GET_CODE (src) == LABEL_REF)
+-      && symbolic_operand (src, VOIDmode)
+-      && (loongarch_cmodel_var == LARCH_CMODEL_EXTREME))
+-    {
+-      rtx temp = gen_reg_rtx (GET_MODE (dest));
+-      rtx x = gen_rtx_UNSPEC_VOLATILE (GET_MODE (dest), gen_rtvec (1, src), UNSPECV_MOVE_EXTREME);
+-      temp = gen_rtx_USE(VOIDmode, temp);
+-      temp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec(2, gen_rtx_SET (dest, x), temp));
+-      emit_insn (temp);
+-      return true;
+-    }
+-
+   return false;
+ }
+ 
+-/* Return true if OP refers to small data symbols directly, not through
+-   a LO_SUM.  CONTEXT is the context in which X appears.  */
++/* Return true if OP refers to small data symbols directly.  */
+ 
+ static int
+-loongarch_small_data_pattern_1 (rtx x, enum loongarch_symbol_context context)
++loongarch_small_data_pattern_1 (rtx x)
+ {
+   subrtx_var_iterator::array_type array;
+   FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+     {
+       rtx x = *iter;
+ 
+-      /* Ignore things like "g" constraints in asms.  We make no particular
+-	 guarantee about which symbolic constants are acceptable as asm operands
+-	 versus which must be forced into a GPR.  */
++      /* We make no particular guarantee about which symbolic constants are
++	 acceptable as asm operands versus which must be forced into a GPR.  */
+       if (GET_CODE (x) == ASM_OPERANDS)
+ 	iter.skip_subrtxes ();
+       else if (MEM_P (x))
+ 	{
+-	  if (loongarch_small_data_pattern_1 (XEXP (x, 0), SYMBOL_CONTEXT_MEM))
++	  if (loongarch_small_data_pattern_1 (XEXP (x, 0)))
+ 	    return true;
+ 	  iter.skip_subrtxes ();
+ 	}
+@@ -3090,20 +3065,19 @@ loongarch_small_data_pattern_1 (rtx x, enum loongarch_symbol_context context)
+   return false;
+ }
+ 
+-/* Return true if OP refers to small data symbols directly, not through
+-   a LO_SUM.  */
++/* Return true if OP refers to small data symbols directly.  */
+ 
+ bool
+ loongarch_small_data_pattern_p (rtx op)
+ {
+-  return loongarch_small_data_pattern_1 (op, SYMBOL_CONTEXT_LEA);
++  return loongarch_small_data_pattern_1 (op);
+ }
+ 
+ /* Rewrite *LOC so that it refers to small data using explicit
+-   relocations.  CONTEXT is the context in which *LOC appears.  */
++   relocation.  */
+ 
+ static void
+-loongarch_rewrite_small_data_1 (rtx *loc, enum loongarch_symbol_context context)
++loongarch_rewrite_small_data_1 (rtx *loc)
+ {
+   subrtx_ptr_iterator::array_type array;
+   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
+@@ -3111,7 +3085,7 @@ loongarch_rewrite_small_data_1 (rtx *loc, enum loongarch_symbol_context context)
+       rtx *loc = *iter;
+       if (MEM_P (*loc))
+ 	{
+-	  loongarch_rewrite_small_data_1 (&XEXP (*loc, 0), SYMBOL_CONTEXT_MEM);
++	  loongarch_rewrite_small_data_1 (&XEXP (*loc, 0));
+ 	  iter.skip_subrtxes ();
+ 	}
+     }
+@@ -3124,15 +3098,15 @@ rtx
+ loongarch_rewrite_small_data (rtx pattern)
+ {
+   pattern = copy_insn (pattern);
+-  loongarch_rewrite_small_data_1 (&pattern, SYMBOL_CONTEXT_LEA);
++  loongarch_rewrite_small_data_1 (&pattern);
+   return pattern;
+ }
+-
++
+ /* The cost of loading values from the constant pool.  It should be
+    larger than the cost of any constant we want to synthesize inline.  */
+ #define CONSTANT_POOL_COST COSTS_N_INSNS (8)
+ 
+-/* Return true if there is a  instruction that implements CODE
++/* Return true if there is a instruction that implements CODE
+    and if that instruction accepts X as an immediate operand.  */
+ 
+ static int
+@@ -3148,20 +3122,19 @@ loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
+ 
+     case ROTATE:
+     case ROTATERT:
+-      /* Likewise rotates, if the target supports rotates at all.  */
+       return true;
+ 
+     case AND:
+     case IOR:
+     case XOR:
+       /* These instructions take 12-bit unsigned immediates.  */
+-      return SMALL_OPERAND_UNSIGNED (x);
++      return IMM12_OPERAND_UNSIGNED (x);
+ 
+     case PLUS:
+     case LT:
+     case LTU:
+       /* These instructions take 12-bit signed immediates.  */
+-      return SMALL_OPERAND (x);
++      return IMM12_OPERAND (x);
+ 
+     case EQ:
+     case NE:
+@@ -3178,11 +3151,11 @@ loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
+ 
+     case LE:
+       /* We add 1 to the immediate and use SLT.  */
+-      return SMALL_OPERAND (x + 1);
++      return IMM12_OPERAND (x + 1);
+ 
+     case LEU:
+       /* Likewise SLTU, but reject the always-true case.  */
+-      return SMALL_OPERAND (x + 1) && x + 1 != 0;
++      return IMM12_OPERAND (x + 1) && x + 1 != 0;
+ 
+     case SIGN_EXTRACT:
+     case ZERO_EXTRACT:
+@@ -3219,7 +3192,8 @@ loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
+ static int
+ loongarch_fp_mult_cost (machine_mode mode)
+ {
+-  return mode == DFmode ? loongarch_cost->fp_mult_df : loongarch_cost->fp_mult_sf;
++  return mode == DFmode ? loongarch_cost->fp_mult_df
++			: loongarch_cost->fp_mult_sf;
+ }
+ 
+ /* Return the cost of floating-point divisions of mode MODE.  */
+@@ -3227,23 +3201,20 @@ loongarch_fp_mult_cost (machine_mode mode)
+ static int
+ loongarch_fp_div_cost (machine_mode mode)
+ {
+-  return mode == DFmode ? loongarch_cost->fp_div_df : loongarch_cost->fp_div_sf;
++  return mode == DFmode ? loongarch_cost->fp_div_df
++			: loongarch_cost->fp_div_sf;
+ }
+ 
+ /* Return the cost of sign-extending OP to mode MODE, not including the
+    cost of OP itself.  */
+ 
+ static int
+-loongarch_sign_extend_cost (machine_mode mode, rtx op)
++loongarch_sign_extend_cost (rtx op)
+ {
+   if (MEM_P (op))
+     /* Extended loads are as cheap as unextended ones.  */
+     return 0;
+ 
+-  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+-    /* A sign extension from SImode to DImode in 64-bit mode is free.  */
+-    return 0;
+-
+   return COSTS_N_INSNS (1);
+ }
+ 
+@@ -3251,16 +3222,12 @@ loongarch_sign_extend_cost (machine_mode mode, rtx op)
+    cost of OP itself.  */
+ 
+ static int
+-loongarch_zero_extend_cost (machine_mode mode, rtx op)
++loongarch_zero_extend_cost (rtx op)
+ {
+   if (MEM_P (op))
+     /* Extended loads are as cheap as unextended ones.  */
+     return 0;
+ 
+-  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+-    /* We need a shift left by 32 bits and a shift right by 32 bits.  */
+-    return COSTS_N_INSNS (2);
+-
+   /* We can use ANDI.  */
+   return COSTS_N_INSNS (1);
+ }
+@@ -3281,15 +3248,15 @@ loongarch_set_reg_reg_cost (machine_mode mode)
+ {
+   switch (GET_MODE_CLASS (mode))
+     {
+-    case MODE_FCC:
+-      return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (FCCmode));
++    case MODE_CC:
++      return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode));
+ 
+     case MODE_FLOAT:
+     case MODE_COMPLEX_FLOAT:
+     case MODE_VECTOR_FLOAT:
+       if (TARGET_HARD_FLOAT)
+ 	return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE);
+-      /* Fall through */
++      /* Fall through.  */
+ 
+     default:
+       return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD);
+@@ -3300,20 +3267,13 @@ loongarch_set_reg_reg_cost (machine_mode mode)
+ 
+ static bool
+ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+-		int opno ATTRIBUTE_UNUSED, int *total, bool speed)
++		     int opno ATTRIBUTE_UNUSED, int *total, bool speed)
+ {
+   int code = GET_CODE (x);
+   bool float_mode_p = FLOAT_MODE_P (mode);
+   int cost;
+   rtx addr;
+ 
+-  /* The cost of a COMPARE is hard to define for LARCH.  COMPAREs don't
+-     appear in the instruction stream, and the cost of a comparison is
+-     really the cost of the branch or scc condition.  At the time of
+-     writing, GCC only uses an explicit outer COMPARE code when optabs
+-     is testing whether a constant is expensive enough to force into a
+-     register.  We want optabs to pass such constants through the LARCH
+-     expanders instead, so make all constants very cheap here.  */
+   if (outer_code == COMPARE)
+     {
+       gcc_assert (CONSTANT_P (x));
+@@ -3324,68 +3284,34 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+   switch (code)
+     {
+     case CONST_INT:
+-      /* Treat *clear_upper32-style ANDs as having zero cost in the
+-	 second operand.  The cost is entirely in the first operand.
+-
+-	 ??? This is needed because we would otherwise try to CSE
+-	 the constant operand.  Although that's the right thing for
+-	 instructions that continue to be a register operation throughout
+-	 compilation, it is disastrous for instructions that could
+-	 later be converted into a memory operation.  */
+-      if (TARGET_64BIT
+-	  && outer_code == AND
+-	  && UINTVAL (x) == 0xffffffff)
++      if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff)
+ 	{
+ 	  *total = 0;
+ 	  return true;
+ 	}
+ 
+-	  /* When not optimizing for size, we care more about the cost
+-	     of hot code, and hot code is often in a loop.  If a constant
+-	     operand needs to be forced into a register, we will often be
+-	     able to hoist the constant load out of the loop, so the load
+-	     should not contribute to the cost.  */
+-	  if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x)))
+-	    {
+-	      *total = 0;
+-	      return true;
+-	    }
++      /* When not optimizing for size, we care more about the cost
++	 of hot code, and hot code is often in a loop.  If a constant
++	 operand needs to be forced into a register, we will often be
++	 able to hoist the constant load out of the loop, so the load
++	 should not contribute to the cost.  */
++      if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x)))
++	{
++	  *total = 0;
++	  return true;
++	}
+       /* Fall through.  */
+ 
+     case CONST:
+     case SYMBOL_REF:
+     case LABEL_REF:
+     case CONST_DOUBLE:
+-      if (force_to_mem_operand (x, VOIDmode))
+-	{
+-	  *total = COSTS_N_INSNS (1);
+-	  return true;
+-	}
+       cost = loongarch_const_insns (x);
+       if (cost > 0)
+ 	{
+-	  /* If the constant is likely to be stored in a GPR, SETs of
+-	     single-insn constants are as cheap as register sets; we
+-	     never want to CSE them.
+-
+-	     Don't reduce the cost of storing a floating-point zero in
+-	     FPRs.  If we have a zero in an FPR for other reasons, we
+-	     can get better cfg-cleanup and delayed-branch results by
+-	     using it consistently, rather than using $0 sometimes and
+-	     an FPR at other times.  Also, moves between floating-point
+-	     registers are sometimes cheaper than MOVGR2FR.W/MOVGR2FR.D $0.  */
+-	  if (cost == 1
+-	      && outer_code == SET
++	  if (cost == 1 && outer_code == SET
+ 	      && !(float_mode_p && TARGET_HARD_FLOAT))
+ 	    cost = 0;
+-	  /* When code loads a constant N>1 times, we rarely
+-	     want to CSE the constant itself.  It is usually better to
+-	     have N copies of the last operation in the sequence and one
+-	     shared copy of the other operations.  
+-
+-	     Also, if we have a CONST_INT, we don't know whether it is
+-	     for a word or doubleword operation, so we cannot rely on
+-	     the result of loongarch_build_integer.  */
+ 	  else if ((outer_code == SET || GET_MODE (x) == VOIDmode))
+ 	    cost = 1;
+ 	  *total = COSTS_N_INSNS (cost);
+@@ -3399,16 +3325,16 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+       /* If the address is legitimate, return the number of
+ 	 instructions it needs.  */
+       addr = XEXP (x, 0);
+-      cost = loongarch_address_insns (addr, mode, true);
+-      if (cost > 0)
++      /* Check for a scaled indexed address.  */
++      if (loongarch_index_address_p (addr, mode))
+ 	{
+-	  *total = COSTS_N_INSNS (cost + 1);
++	  *total = COSTS_N_INSNS (2);
+ 	  return true;
+ 	}
+-      /* Check for a scaled indexed address.  */
+-      if (loongarch_lx_address_p (addr, mode))
++      cost = loongarch_address_insns (addr, mode, true);
++      if (cost > 0)
+ 	{
+-	  *total = COSTS_N_INSNS (2);
++	  *total = COSTS_N_INSNS (cost + 1);
+ 	  return true;
+ 	}
+       /* Otherwise use the default handling.  */
+@@ -3425,34 +3351,31 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+     case AND:
+       /* Check for a *clear_upper32 pattern and treat it like a zero
+ 	 extension.  See the pattern's comment for details.  */
+-      if (TARGET_64BIT
+-	  && mode == DImode
+-	  && CONST_INT_P (XEXP (x, 1))
++      if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
+ 	  && UINTVAL (XEXP (x, 1)) == 0xffffffff)
+ 	{
+-	  *total = (loongarch_zero_extend_cost (mode, XEXP (x, 0))
++	  *total = (loongarch_zero_extend_cost (XEXP (x, 0))
+ 		    + set_src_cost (XEXP (x, 0), mode, speed));
+ 	  return true;
+ 	}
+       /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
+ 	 a single instruction.  */
+-      if (GET_CODE (XEXP (x, 0)) == NOT
+-	  && GET_CODE (XEXP (x, 1)) == NOT)
++      if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT)
+ 	{
+ 	  cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1;
+-          *total = (COSTS_N_INSNS (cost)
++	  *total = (COSTS_N_INSNS (cost)
+ 		    + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+ 		    + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
+ 	  return true;
+ 	}
+-	   	
++
+       /* Fall through.  */
+ 
+     case IOR:
+     case XOR:
+       /* Double-word operations use two single-word operations.  */
+       *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+-				 speed);
++				      speed);
+       return true;
+ 
+     case ASHIFT:
+@@ -3461,18 +3384,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+     case ROTATE:
+     case ROTATERT:
+       if (CONSTANT_P (XEXP (x, 1)))
+-	*total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+-				   speed);
++	*total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
++					COSTS_N_INSNS (4), speed);
+       else
+-	*total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12),
+-				   speed);
++	*total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
++					COSTS_N_INSNS (12), speed);
+       return true;
+ 
+     case ABS:
+       if (float_mode_p)
+-        *total = loongarch_cost->fp_add;
++	*total = loongarch_cost->fp_add;
+       else
+-        *total = COSTS_N_INSNS (4);
++	*total = COSTS_N_INSNS (4);
+       return false;
+ 
+     case LT:
+@@ -3500,7 +3423,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 	  return false;
+ 	}
+       *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+-				 speed);
++				      speed);
+       return true;
+ 
+     case MINUS:
+@@ -3512,13 +3435,12 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 	}
+ 
+       /* If it's an add + mult (which is equivalent to shift left) and
+-         it's immediate operand satisfies const_immlsa_operand predicate.  */
+-      if (((ISA_HAS_LSA && mode == SImode)
+-	   || (ISA_HAS_DLSA && mode == DImode))
++	 it's immediate operand satisfies const_immalsl_operand predicate.  */
++      if ((mode == SImode || (TARGET_64BIT && mode == DImode))
+ 	  && GET_CODE (XEXP (x, 0)) == MULT)
+ 	{
+ 	  rtx op2 = XEXP (XEXP (x, 0), 1);
+-	  if (const_immlsa_operand (op2, mode))
++	  if (const_immalsl_operand (op2, mode))
+ 	    {
+ 	      *total = (COSTS_N_INSNS (1)
+ 			+ set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+@@ -3529,9 +3451,8 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 
+       /* Double-word operations require three single-word operations and
+ 	 an SLTU.  */
+-      *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
+-				 COSTS_N_INSNS (4),
+-				 speed);
++      *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
++				      speed);
+       return true;
+ 
+     case NEG:
+@@ -3549,9 +3470,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+       if (float_mode_p)
+ 	*total = loongarch_fp_mult_cost (mode);
+       else if (mode == DImode && !TARGET_64BIT)
+-	/* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions,
+-	   where the mulsidi3 always includes an MFHI and an MFLO.  */
+-	// FIXED ME???
+ 	*total = (speed
+ 		  ? loongarch_cost->int_mult_si * 3 + 6
+ 		  : COSTS_N_INSNS (7));
+@@ -3566,7 +3484,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+     case DIV:
+       /* Check for a reciprocal.  */
+       if (float_mode_p
+-	  && ISA_HAS_FP_RECIP_RSQRT (mode)
+ 	  && flag_unsafe_math_optimizations
+ 	  && XEXP (x, 0) == CONST1_RTX (mode))
+ 	{
+@@ -3597,17 +3514,17 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 	  *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
+ 	}
+       else if (mode == DImode)
+-        *total = loongarch_cost->int_div_di;
++	*total = loongarch_cost->int_div_di;
+       else
+ 	*total = loongarch_cost->int_div_si;
+       return false;
+ 
+     case SIGN_EXTEND:
+-      *total = loongarch_sign_extend_cost (mode, XEXP (x, 0));
++      *total = loongarch_sign_extend_cost (XEXP (x, 0));
+       return false;
+ 
+     case ZERO_EXTEND:
+-      *total = loongarch_zero_extend_cost (mode, XEXP (x, 0));
++      *total = loongarch_zero_extend_cost (XEXP (x, 0));
+       return false;
+     case TRUNCATE:
+       /* Costings for highpart multiplies.  Matching patterns of the form:
+@@ -3617,11 +3534,11 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 		      (const_int 32)
+       */
+       if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT
+-	      || GET_CODE (XEXP (x, 0)) == LSHIFTRT)
++	   || GET_CODE (XEXP (x, 0)) == LSHIFTRT)
+ 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ 	  && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32
+ 	       && GET_MODE (XEXP (x, 0)) == DImode)
+-	      || (ISA_HAS_DMUL
++	      || (TARGET_64BIT
+ 		  && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
+ 		  && GET_MODE (XEXP (x, 0)) == TImode))
+ 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+@@ -3643,13 +3560,13 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 	  for (int i = 0; i < 2; ++i)
+ 	    {
+ 	      rtx op = XEXP (XEXP (XEXP (x, 0), 0), i);
+-	      if (ISA_HAS_DMUL
++	      if (TARGET_64BIT
+ 		  && GET_CODE (op) == ZERO_EXTEND
+ 		  && GET_MODE (op) == DImode)
+ 		*total += rtx_cost (op, DImode, MULT, i, speed);
+ 	      else
+-		*total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op),
+-				    0, speed);
++		*total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0,
++				    speed);
+ 	    }
+ 
+ 	  return true;
+@@ -3684,58 +3601,168 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 
+ static int
+ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+-                                    tree vectype,
+-                                    int misalign ATTRIBUTE_UNUSED)
++				      tree vectype,
++				      int misalign ATTRIBUTE_UNUSED)
+ {
+-  unsigned elements;
+-
++  int elements;
+   switch (type_of_cost)
+     {
+-      case scalar_stmt:
+-      case scalar_load:
+-      case vector_stmt:
+-      case vector_load:
+-      case vec_to_scalar:
+-      case scalar_to_vec:
+-      case cond_branch_not_taken:
+-      case vec_perm:
+-      case vec_promote_demote:
+-      case scalar_store:
+-      case vector_store:
+-	return 1;
+-
+-      case unaligned_load:
+-      case vector_gather_load:
+-	return 2;
++    case scalar_stmt:
++    case vector_stmt:
++    case vec_to_scalar:
++    case scalar_to_vec:
++    case vec_perm:
++    case vec_promote_demote:
++      return 1;
+ 
+-      case unaligned_store:
+-      case vector_scatter_store:
+-	return 10;
++    case scalar_store:
++    case scalar_load:
++      return 3;
+ 
+-      case cond_branch_taken:
+-	return 3;
++    case vector_store:
++    case vector_load:
++      return loongarch_vector_access_cost;
+ 
+-      case vec_construct:
+-	elements = TYPE_VECTOR_SUBPARTS (vectype);
+-	return elements / 2 + 1;
++    case unaligned_load:
++    case unaligned_store:
++    case vector_gather_load:
++    case vector_scatter_store:
++      return 5;
+ 
+-      default:
+-	gcc_unreachable ();
+-    }
+-}
++    case cond_branch_taken:
++      return 4;
++
++    case cond_branch_not_taken:
++      return 2;
++
++    case vec_construct:
++	{
++	  elements = TYPE_VECTOR_SUBPARTS (vectype);
++	  if (ISA_HAS_LASX)
++	    return elements + 1;
++	  else
++	    return elements;
++	}
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Implement targetm.vectorize.add_stmt_cost.  */
++static unsigned
++loongarch_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
++			 struct _stmt_vec_info *stmt_info, int misalign,
++			 enum vect_cost_model_location where)
++{
++  unsigned *cost = (unsigned *) data;
++  unsigned retval = 0;
++
++  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
++  int stmt_cost = - 1;
++
++  if ((kind == vector_stmt || kind == scalar_stmt)
++      && stmt_info
++      && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
++    {
++      tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
++      bool fp = false;
++      machine_mode mode = TImode;
++
++      if (vectype != NULL)
++	{
++	  fp = FLOAT_TYPE_P (vectype);
++	  mode = TYPE_MODE (vectype);
++	}
++
++      switch (subcode)
++	{
++	case PLUS_EXPR:
++	case POINTER_PLUS_EXPR:
++	case MINUS_EXPR:
++	case MULT_EXPR:
++	case WIDEN_MULT_EXPR:
++	case MULT_HIGHPART_EXPR:
++	  stmt_cost = fp ? 2 : 1;
++	  break;
++
++	case TRUNC_DIV_EXPR:
++	case CEIL_DIV_EXPR:
++	case FLOOR_DIV_EXPR:
++	case ROUND_DIV_EXPR:
++	case TRUNC_MOD_EXPR:
++	case CEIL_MOD_EXPR:
++	case FLOOR_MOD_EXPR:
++	case RDIV_EXPR:
++	case ROUND_MOD_EXPR:
++	case EXACT_DIV_EXPR:
++	  stmt_cost = fp ? 4 : 1;
++	  break;
++
++	case NOP_EXPR:
++	  /* Only sign-conversions are free.  */
++	  if (tree_nop_conversion_p
++	      (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
++	       TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
++	    stmt_cost = 0;
++	  break;
++
++	default:
++	  break;
++	}
++    }
++  if (kind == vec_construct
++      && stmt_info
++      && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
++	  || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
++      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
++      && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
++    {
++      stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, misalign);
++      stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype);
++    }
++  if (stmt_cost == -1)
++    stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, misalign);
++
++  /* Statements in an inner loop relative to the loop being
++     vectorized are weighted more heavily.  The value here is
++     arbitrary and could potentially be improved with analysis.  */
++  if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
++    count *= 50;  /* FIXME.  */
++
++  retval = (unsigned) (count * stmt_cost);
++
++  cost[where] += retval;
++
++  return retval;
++}
++
++static bool
++loongarch_builtin_support_vector_misalignment(machine_mode mode, const_tree type,
++					      int misalignment, bool is_packed)
++{
++  if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT)
++    {
++      if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
++        return false;
++      if (misalignment == -1)
++        return false;
++    }
++  return default_builtin_support_vector_misalignment (mode, type, misalignment,
++						      is_packed);
++}
+ 
+ 
+ /* Implement TARGET_ADDRESS_COST.  */
+ 
+ static int
+ loongarch_address_cost (rtx addr, machine_mode mode,
+-		   addr_space_t as ATTRIBUTE_UNUSED,
+-		   bool speed ATTRIBUTE_UNUSED)
++			addr_space_t as ATTRIBUTE_UNUSED,
++			bool speed ATTRIBUTE_UNUSED)
+ {
+   return loongarch_address_insns (addr, mode, false);
+ }
+ 
+-
+ /* Return one word of double-word value OP, taking into account the fixed
+    endianness of certain registers.  HIGH_P is true to select the high part,
+    false to select the low part.  */
+@@ -3743,24 +3770,16 @@ loongarch_address_cost (rtx addr, machine_mode mode,
+ rtx
+ loongarch_subword (rtx op, bool high_p)
+ {
+-  unsigned int byte, offset;
++  unsigned int byte;
+   machine_mode mode;
+ 
++  byte = high_p ? UNITS_PER_WORD : 0;
+   mode = GET_MODE (op);
+   if (mode == VOIDmode)
+     mode = TARGET_64BIT ? TImode : DImode;
+ 
+-  if (high_p)
+-    byte = UNITS_PER_WORD;
+-  else
+-    byte = 0;
+-
+   if (FP_REG_RTX_P (op))
+-    {
+-      /* Paired FPRs are always ordered little-endian.  */
+-      offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0);
+-      return gen_rtx_REG (word_mode, REGNO (op) + offset);
+-    }
++    return gen_rtx_REG (word_mode, REGNO (op) + high_p);
+ 
+   if (MEM_P (op))
+     return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
+@@ -3768,11 +3787,10 @@ loongarch_subword (rtx op, bool high_p)
+   return simplify_gen_subreg (word_mode, op, mode, byte);
+ }
+ 
+-/* Return true if a move from SRC to DEST should be split into two.
+-   SPLIT_TYPE describes the split condition.  */
++/* Return true if a move from SRC to DEST should be split into two.  */
+ 
+ bool
+-loongarch_split_move_p (rtx dest, rtx src, enum loongarch_split_type split_type)
++loongarch_split_move_p (rtx dest, rtx src)
+ {
+   /* FPR-to-FPR moves can be done in a single instruction, if they're
+      allowed at all.  */
+@@ -3801,19 +3819,18 @@ loongarch_split_move_p (rtx dest, rtx src, enum loongarch_split_type split_type)
+   return size > UNITS_PER_WORD;
+ }
+ 
+-/* Split a move from SRC to DEST, given that loongarch_split_move_p holds.
+-   SPLIT_TYPE describes the split condition.  */
++/* Split a move from SRC to DEST, given that loongarch_split_move_p holds.  */
+ 
+ void
+-loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, rtx insn_)
++loongarch_split_move (rtx dest, rtx src, rtx insn_)
+ {
+   rtx low_dest;
+ 
+-  gcc_checking_assert (loongarch_split_move_p (dest, src, split_type));
++  gcc_checking_assert (loongarch_split_move_p (dest, src));
+   if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
+     loongarch_split_128bit_move (dest, src);
+   else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+-    loongarch_split_256bit_move (dest, src); 
++    loongarch_split_256bit_move (dest, src);
+   else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+     {
+       if (!TARGET_64BIT && GET_MODE (dest) == DImode)
+@@ -3830,23 +3847,24 @@ loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, r
+       /* The operation can be split into two normal moves.  Decide in
+ 	 which order to do them.  */
+       low_dest = loongarch_subword (dest, false);
+-      if (REG_P (low_dest)
+-	  && reg_overlap_mentioned_p (low_dest, src))
++      if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
+ 	{
+-	  loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true));
++	  loongarch_emit_move (loongarch_subword (dest, true),
++			       loongarch_subword (src, true));
+ 	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+ 	}
+       else
+ 	{
+ 	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+-	  loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true));
++	  loongarch_emit_move (loongarch_subword (dest, true),
++			       loongarch_subword (src, true));
+ 	}
+     }
+ 
+   /* This is a hack.  See if the next insn uses DEST and if so, see if we
+      can forward SRC for DEST.  This is most useful if the next insn is a
+-     simple store.   */
+-  rtx_insn *insn = (rtx_insn *)insn_;
++     simple store.  */
++  rtx_insn *insn = (rtx_insn *) insn_;
+   struct loongarch_address_info addr = {};
+   if (insn)
+     {
+@@ -3859,7 +3877,8 @@ loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, r
+ 	      if (MEM_P (src))
+ 		{
+ 		  rtx tmp = XEXP (src, 0);
+-		  loongarch_classify_address (&addr, tmp, GET_MODE (tmp), true);
++		  loongarch_classify_address (&addr, tmp, GET_MODE (tmp),
++					      true);
+ 		  if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg))
+ 		    validate_change (next, &SET_SRC (set), src, false);
+ 		}
+@@ -3870,24 +3889,6 @@ loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, r
+     }
+ }
+ 
+-/* Return the split type for instruction INSN.  */
+-
+-static enum loongarch_split_type
+-loongarch_insn_split_type (rtx insn)
+-{
+-  basic_block bb = BLOCK_FOR_INSN (insn);
+-  if (bb)
+-    {
+-      if (optimize_bb_for_speed_p (bb))
+-	return SPLIT_FOR_SPEED;
+-      else
+-	return SPLIT_FOR_SIZE;
+-    }
+-  /* Once CFG information has been removed, we should trust the optimization
+-     decisions made by previous passes and only split where necessary.  */
+-  return SPLIT_IF_NECESSARY;
+-}
+-
+ /* Return true if a 128-bit move from SRC to DEST should be split.  */
+ 
+ bool
+@@ -3974,10 +3975,10 @@ loongarch_split_128bit_move (rtx dest, rtx src)
+ 	  s = loongarch_subword_at_byte (src, byte);
+ 	  if (!TARGET_64BIT)
+ 	    emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
+-					 GEN_INT (1 << index)));
++					    GEN_INT (1 << index)));
+ 	  else
+ 	    emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
+-					 GEN_INT (1 << index)));
++					    GEN_INT (1 << index)));
+ 	}
+     }
+   else if (FP_REG_RTX_P (src))
+@@ -4200,28 +4201,93 @@ loongarch_split_lsx_fill_d (rtx dest, rtx src)
+   emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1)));
+   emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3)));
+ }
+-
++
+ /* Return true if a move from SRC to DEST in INSN should be split.  */
+ 
+ bool
+-loongarch_split_move_insn_p (rtx dest, rtx src, rtx insn)
++loongarch_split_move_insn_p (rtx dest, rtx src)
+ {
+-  return loongarch_split_move_p (dest, src, loongarch_insn_split_type (insn));
++  return loongarch_split_move_p (dest, src);
+ }
+ 
+-/* Split a move from SRC to DEST in INSN, given that loongarch_split_move_insn_p
+-   holds.  */
++/* Split a move from SRC to DEST in INSN, given that
++   loongarch_split_move_insn_p holds.  */
+ 
+ void
+ loongarch_split_move_insn (rtx dest, rtx src, rtx insn)
+ {
+-  loongarch_split_move (dest, src, loongarch_insn_split_type (insn), insn);
++  loongarch_split_move (dest, src, insn);
+ }
+-
+ 
+-/* Forward declaration. Used below */
++/* Implement TARGET_CONSTANT_ALIGNMENT.  */
++
+ static HOST_WIDE_INT
+-loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align);
++loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align)
++{
++  if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
++    return MAX (align, BITS_PER_WORD);
++  return align;
++}
++
++const char *
++loongarch_output_move_index (rtx x, machine_mode mode, bool ldr)
++{
++  int index = exact_log2 (GET_MODE_SIZE (mode));
++  if (!IN_RANGE (index, 0, 3))
++    return NULL;
++
++  struct loongarch_address_info info;
++  if ((loongarch_classify_address (&info, x, mode, false)
++       && !(info.type == ADDRESS_REG_REG))
++      || !loongarch_legitimate_address_p (mode, x, false))
++    return NULL;
++
++  const char *const insn[][4] =
++    {
++      {
++	"stx.b\t%z1,%0",
++	"stx.h\t%z1,%0",
++	"stx.w\t%z1,%0",
++	"stx.d\t%z1,%0",
++      },
++      {
++	"ldx.bu\t%0,%1",
++	"ldx.hu\t%0,%1",
++	"ldx.w\t%0,%1",
++	"ldx.d\t%0,%1",
++      }
++    };
++
++  return insn[ldr][index];
++}
++
++const char *
++loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
++{
++  int index = exact_log2 (GET_MODE_SIZE (mode));
++  if (!IN_RANGE (index, 2, 3))
++    return NULL;
++
++  struct loongarch_address_info info;
++  if ((loongarch_classify_address (&info, x, mode, false)
++       && !(info.type == ADDRESS_REG_REG))
++      || !loongarch_legitimate_address_p (mode, x, false))
++    return NULL;
++
++  const char *const insn[][2] =
++    {
++	{
++	  "fstx.s\t%1,%0",
++	  "fstx.d\t%1,%0"
++	},
++	{
++	  "fldx.s\t%0,%1",
++	  "fldx.d\t%0,%1"
++	}
++    };
++
++  return insn[ldr][index-2];
++}
+ 
+ /* Return the appropriate instructions to move SRC into DEST.  Assume
+    that SRC is operand 1 and DEST is operand 0.  */
+@@ -4235,9 +4301,8 @@ loongarch_output_move (rtx dest, rtx src)
+   bool dbl_p = (GET_MODE_SIZE (mode) == 8);
+   bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
+   bool lasx_p = LASX_SUPPORTED_MODE_P (mode);
+-  enum loongarch_symbol_type symbol_type;
+ 
+-  if (loongarch_split_move_p (dest, src, SPLIT_IF_NECESSARY))
++  if (loongarch_split_move_p (dest, src))
+     return "#";
+ 
+   if ((lsx_p || lasx_p)
+@@ -4246,7 +4311,7 @@ loongarch_output_move (rtx dest, rtx src)
+       && CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
+     {
+       gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511));
+-      if(lsx_p || lasx_p)
++      if (lsx_p || lasx_p)
+       {
+         switch (GET_MODE_SIZE (mode))
+            {
+@@ -4254,7 +4319,8 @@ loongarch_output_move (rtx dest, rtx src)
+             return "vrepli.%v0\t%w0,%E1";
+           case 32:
+             return "xvrepli.%v0\t%u0,%E1";
+-          default: gcc_unreachable ();
++	  default:
++	    gcc_unreachable ();
+           }
+       }
+     }
+@@ -4278,77 +4344,98 @@ loongarch_output_move (rtx dest, rtx src)
+ 		      return "vrepli.b\t%w0,0";
+     	            case 32:
+ 		      return "xvrepli.b\t%u0,0";
+-    	            default: gcc_unreachable ();
++		    default:
++		      gcc_unreachable ();
+     	            }
+ 		}
+ 
+ 	      return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1";
+ 	    }
+ 	}
+-      if (dest_code == MEM) 
++      if (dest_code == MEM)
+ 	{
++	  const char *insn = NULL;
++	  insn = loongarch_output_move_index (XEXP (dest, 0), GET_MODE (dest),
++					      false);
++	  if (insn)
++	    return insn;
++
+ 	  rtx offset = XEXP (dest, 0);
+-	  if (GET_CODE(offset) == PLUS)
+-	    offset = XEXP(offset, 1);
++	  if (GET_CODE (offset) == PLUS)
++	    offset = XEXP (offset, 1);
++	  else
++	    offset = const0_rtx;
+ 	  switch (GET_MODE_SIZE (mode))
+ 	    {
+-	      case 1: return "st.b\t%z1,%0";
+-	      case 2: return "st.h\t%z1,%0";
+-	      case 4:
+-		if (const_arith_operand (offset, Pmode)) 
+-		  return "st.w\t%z1,%0";
+-		else
+-		  return "stptr.w\t%z1,%0";
+-	      case 8:
+-		if (const_arith_operand (offset, Pmode))
+- 		  return "st.d\t%z1,%0";
+-		else
+-		  return "stptr.d\t%z1,%0";
+-	      default: gcc_unreachable ();
+-	  }
++	    case 1:
++	      return "st.b\t%z1,%0";
++	    case 2:
++	      return "st.h\t%z1,%0";
++	    case 4:
++	      if (const_arith_operand (offset, Pmode) || (offset == const0_rtx))
++		return "st.w\t%z1,%0";
++	      else
++		return "stptr.w\t%z1,%0";
++	    case 8:
++	      if (const_arith_operand (offset, Pmode) || (offset == const0_rtx))
++		return "st.d\t%z1,%0";
++	      else
++		return "stptr.d\t%z1,%0";
++	    default:
++	      gcc_unreachable ();
++	    }
+ 	}
+     }
+   if (dest_code == REG && GP_REG_P (REGNO (dest)))
+     {
+       if (src_code == REG)
+-	{
+-	  if (FP_REG_P (REGNO (src)))
+-	    {
+-	      gcc_assert (!lsx_p);
+-	      return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1";
+-	    }
+-	}
++	if (FP_REG_P (REGNO (src)))
++	  {
++	    gcc_assert (!lsx_p && !lasx_p);
++	    return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1";
++	  }
+ 
+       if (src_code == MEM)
+ 	{
++	  const char *insn = NULL;
++	  insn = loongarch_output_move_index (XEXP (src, 0), GET_MODE (src),
++					      true);
++	  if (insn)
++	    return insn;
++
+ 	  rtx offset = XEXP (src, 0);
+-	  if (GET_CODE(offset) == PLUS)
+-	    offset = XEXP(offset, 1);
++	  if (GET_CODE (offset) == PLUS)
++	    offset = XEXP (offset, 1);
++	  else
++	    offset = const0_rtx;
+ 	  switch (GET_MODE_SIZE (mode))
+ 	    {
+-	      case 1: return "ld.bu\t%0,%1";
+-	      case 2: return "ld.hu\t%0,%1";
+-	      case 4:
+-		if (const_arith_operand (offset, Pmode))
+-		  return "ld.w\t%0,%1";
+-		else
+-		  return "ldptr.w\t%0,%1";
+-	      case 8:
+-		if (const_arith_operand (offset, Pmode))
+-		  return "ld.d\t%0,%1";
+-		else
+-		  return "ldptr.d\t%0,%1";
+-	      default: gcc_unreachable ();
++	    case 1:
++	      return "ld.bu\t%0,%1";
++	    case 2:
++	      return "ld.hu\t%0,%1";
++	    case 4:
++	      if (const_arith_operand (offset, Pmode) || (offset == const0_rtx))
++		return "ld.w\t%0,%1";
++	      else
++		return "ldptr.w\t%0,%1";
++	    case 8:
++	      if (const_arith_operand (offset, Pmode) || (offset == const0_rtx))
++		return "ld.d\t%0,%1";
++	      else
++		return "ldptr.d\t%0,%1";
++	    default:
++	      gcc_unreachable ();
+ 	    }
+ 	}
+-  
++
+       if (src_code == CONST_INT)
+ 	{
+-	  if (LUI_INT (src))
++	  if (LU12I_INT (src))
+ 	    return "lu12i.w\t%0,%1>>12\t\t\t# %X1";
+-	  else if (SMALL_INT (src))
++	  else if (IMM12_INT (src))
+ 	    return "addi.w\t%0,$r0,%1\t\t\t# %X1";
+-	  else if (SMALL_INT_UNSIGNED (src))
++	  else if (IMM12_INT_UNSIGNED (src))
+ 	    return "ori\t%0,$r0,%1\t\t\t# %X1";
+ 	  else if (LU52I_INT (src))
+ 	    return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1";
+@@ -4358,56 +4445,51 @@ loongarch_output_move (rtx dest, rtx src)
+ 
+       if (symbolic_operand (src, VOIDmode))
+ 	{
+-
+-	  switch (loongarch_cmodel_var)
++	  if ((TARGET_CMODEL_TINY && (!loongarch_global_symbol_p (src)
++				      || loongarch_symbol_binds_local_p (src)))
++	      || (TARGET_CMODEL_TINY_STATIC && !loongarch_weak_symbol_p (src)))
+ 	    {
+-	    case LARCH_CMODEL_TINY:
+-	      do
++	      /* The symbol must be aligned to 4 byte.  */
++	      unsigned int align;
++
++	      if (GET_CODE (src) == LABEL_REF)
++		align = 32 /* Whatever.  */;
++	      else if (CONSTANT_POOL_ADDRESS_P (src))
++		align = GET_MODE_ALIGNMENT (get_pool_mode (src));
++	      else if (TREE_CONSTANT_POOL_ADDRESS_P (src))
+ 		{
+-		  if (loongarch_global_symbol_p (src)
+-		      && !loongarch_symbol_binds_local_p (src))
+-		    break;
+-	    case LARCH_CMODEL_TINY_STATIC:
+-		  if (loongarch_weak_symbol_p (src))
+-		    break;
+-
+-		  /* The symbol must be aligned to 4 byte.  */
+-		  unsigned int align;
+-
+-		  if (GET_CODE (src) == LABEL_REF)
+-		    align = 128 /* whatever */;
+-		  /* copy from aarch64 */
+-		  else if (CONSTANT_POOL_ADDRESS_P (src))
+-		    align = GET_MODE_ALIGNMENT (get_pool_mode (src));
+-		  else if (TREE_CONSTANT_POOL_ADDRESS_P (src))
+-		    {
+-		      tree exp = SYMBOL_REF_DECL (src);
+-		      align = TYPE_ALIGN (TREE_TYPE (exp));
+-		      align = loongarch_constant_alignment (exp, align);
+-		    }
+-		  else if (SYMBOL_REF_DECL (src))
+-		    align = DECL_ALIGN (SYMBOL_REF_DECL (src));
+-		  else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src)
+-			   && SYMBOL_REF_BLOCK (src) != NULL)
+-		    align = SYMBOL_REF_BLOCK (src)->alignment;
+-		  else
+-		    align = BITS_PER_UNIT;
+-
+-		  if (align % (4 * 8) == 0)
+-		    return "pcaddi\t%0,%%pcrel(%1)>>2";
++		  tree exp = SYMBOL_REF_DECL (src);
++		  align = TYPE_ALIGN (TREE_TYPE (exp));
++		  align = loongarch_constant_alignment (exp, align);
+ 		}
+-	      while (0);
+-	    case LARCH_CMODEL_NORMAL:
+-	    case LARCH_CMODEL_LARGE:
++	      else if (SYMBOL_REF_DECL (src))
++		align = DECL_ALIGN (SYMBOL_REF_DECL (src));
++	      else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src)
++		       && SYMBOL_REF_BLOCK (src) != NULL)
++		align = SYMBOL_REF_BLOCK (src)->alignment;
++	      else
++		align = BITS_PER_UNIT;
++
++	      if (align % (4 * 8) == 0)
++		return "pcaddi\t%0,%%pcrel(%1)>>2";
++	    }
++	  if (TARGET_CMODEL_TINY
++	      || TARGET_CMODEL_TINY_STATIC
++	      || TARGET_CMODEL_NORMAL
++	      || TARGET_CMODEL_LARGE)
++	    {
+ 	      if (!loongarch_global_symbol_p (src)
+ 		  || loongarch_symbol_binds_local_p (src))
+ 		return "la.local\t%0,%1";
+ 	      else
+ 		return "la.global\t%0,%1";
+-	    case LARCH_CMODEL_EXTREME:
+-	    default:
++	    }
++	  if (TARGET_CMODEL_EXTREME)
++	    {
++	      sorry ("Normal symbol loading not implemented in extreme mode.");
+ 	      gcc_unreachable ();
+ 	    }
++
+ 	}
+     }
+   if (src_code == REG && FP_REG_P (REGNO (src)))
+@@ -4416,14 +4498,14 @@ loongarch_output_move (rtx dest, rtx src)
+ 	{
+ 	  if (lsx_p || lasx_p)
+ 	  {
+-
+     	    switch (GET_MODE_SIZE (mode))
+     	      {
+     	      case 16:
+ 	        return "vori.b\t%w0,%w1,0";
+     	      case 32:
+ 	        return "xvori.b\t%u0,%u1,0";
+-    	      default: gcc_unreachable ();
++	      default:
++		gcc_unreachable ();
+     	      }
+ 	  }
+ 	  else
+@@ -4434,16 +4516,22 @@ loongarch_output_move (rtx dest, rtx src)
+ 	{
+ 	  if (lsx_p || lasx_p)
+ 	  {
+-
+     	    switch (GET_MODE_SIZE (mode))
+     	      {
+     	      case 16:
+ 	        return "vst\t%w1,%0";
+     	      case 32:
+ 	        return "xvst\t%u1,%0";
+-    	      default: gcc_unreachable ();
++	      default:
++		gcc_unreachable ();
+     	      }
+ 	  }
++	  const char *insn = NULL;
++	  insn = loongarch_output_move_index_float (XEXP (dest, 0),
++						    GET_MODE (dest),
++						    false);
++	  if (insn)
++	    return insn;
+ 
+ 	  return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0";
+ 	}
+@@ -4460,17 +4548,25 @@ loongarch_output_move (rtx dest, rtx src)
+ 	        return "vld\t%w0,%1";
+     	      case 32:
+ 	        return "xvld\t%u0,%1";
+-    	      default: gcc_unreachable ();
++	      default:
++		gcc_unreachable ();
+     	      }
+ 	  }
++	  const char *insn = NULL;
++	  insn = loongarch_output_move_index_float (XEXP (src, 0),
++						    GET_MODE (src),
++						    true);
++	  if (insn)
++	    return insn;
++
+ 	  return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1";
+ 	}
+     }
+   gcc_unreachable ();
+ }
+-
++
+ /* Return true if CMP1 is a suitable second operand for integer ordering
+-   test CODE.  See also the *sCC patterns in loongarch.md.  */
++   test CODE.  */
+ 
+ static bool
+ loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
+@@ -4508,7 +4604,7 @@ loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
+ 
+ static bool
+ loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
+-				  machine_mode mode)
++				       machine_mode mode)
+ {
+   HOST_WIDE_INT plus_one;
+ 
+@@ -4551,11 +4647,11 @@ loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
+ 
+ static void
+ loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
+-			  rtx target, rtx cmp0, rtx cmp1)
++			       rtx target, rtx cmp0, rtx cmp1)
+ {
+   machine_mode mode;
+ 
+-  /* First see if there is a LARCH instruction that can do this operation.
++  /* First see if there is a LoongArch instruction that can do this operation.
+      If not, try doing the same for the inverse operation.  If that also
+      fails, force CMP1 into a register and try again.  */
+   mode = GET_MODE (cmp0);
+@@ -4574,7 +4670,7 @@ loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
+ 	  rtx inv_target;
+ 
+ 	  inv_target = loongarch_force_binary (GET_MODE (target),
+-					  inv_code, cmp0, cmp1);
++					       inv_code, cmp0, cmp1);
+ 	  loongarch_emit_binary (XOR, target, inv_target, const1_rtx);
+ 	}
+       else
+@@ -4595,43 +4691,14 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
+     return cmp0;
+ 
+   if (uns_arith_operand (cmp1, VOIDmode))
+-    return expand_binop (GET_MODE (cmp0), xor_optab,
+-			 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
++    return expand_binop (GET_MODE (cmp0), xor_optab, cmp0, cmp1, 0, 0,
++			 OPTAB_DIRECT);
+ 
+-  return expand_binop (GET_MODE (cmp0), sub_optab,
+-		       cmp0, cmp1, 0, 0, OPTAB_DIRECT);
++  return expand_binop (GET_MODE (cmp0), sub_optab, cmp0, cmp1, 0, 0,
++		       OPTAB_DIRECT);
+ }
+ 
+-/* Allocate a floating-point condition-code register of mode MODE.
+-
+-   These condition code registers are used for certain kinds
+-   of compound operation, such as compare and branches, vconds,
+-   and built-in functions.  At expand time, their use is entirely
+-   controlled by LARCH-specific code and is entirely internal
+-   to these compound operations.
+-
+-   We could (and did in the past) expose condition-code values
+-   as pseudo registers and leave the register allocator to pick
+-   appropriate registers.  The problem is that it is not practically
+-   possible for the rtl optimizers to guarantee that no spills will
+-   be needed, even when AVOID_CCMODE_COPIES is defined.  We would
+-   therefore need spill and reload sequences to handle the worst case.
+-
+-   Although such sequences do exist, they are very expensive and are
+-   not something we'd want to use.  
+-
+-   The main benefit of having more than one condition-code register
+-   is to allow the pipelining of operations, especially those involving
+-   comparisons and conditional moves.  We don't really expect the
+-   registers to be live for long periods, and certainly never want
+-   them to be live across calls.
+-
+-   Also, there should be no penalty attached to using all the available
+-   registers.  They are simply bits in the same underlying FPU control
+-   register.
+-
+-   We therefore expose the hardware registers from the outset and use
+-   a simple round-robin allocation scheme.  */
++/* Allocate a floating-point condition-code register of mode MODE.  */
+ 
+ static rtx
+ loongarch_allocate_fcc (machine_mode mode)
+@@ -4646,15 +4713,14 @@ loongarch_allocate_fcc (machine_mode mode)
+     gcc_unreachable ();
+ 
+   cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
+-  if (cfun->machine->next_fcc > ST_REG_LAST - ST_REG_FIRST)
++  if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST)
+     cfun->machine->next_fcc = 0;
+ 
+-  regno = ST_REG_FIRST + cfun->machine->next_fcc;
++  regno = FCC_REG_FIRST + cfun->machine->next_fcc;
+   cfun->machine->next_fcc += count;
+   return gen_rtx_REG (mode, regno);
+ }
+ 
+-
+ /* Sign- or zero-extend OP0 and OP1 for integer comparisons.  */
+ 
+ static void
+@@ -4681,6 +4747,7 @@ loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
+     }
+ }
+ 
++
+ /* Convert a comparison into something that can be used in a branch.  On
+    entry, *OP0 and *OP1 are the values being compared and *CODE is the code
+    used to compare them.  Update them to describe the final comparison.  */
+@@ -4688,6 +4755,9 @@ loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
+ static void
+ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+ {
++  static const enum rtx_code
++  mag_comparisons[][2] = {{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}};
++
+   if (splittable_const_int_operand (*op1, VOIDmode))
+     {
+       HOST_WIDE_INT rhs = INTVAL (*op1);
+@@ -4695,7 +4765,7 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+       if (*code == EQ || *code == NE)
+ 	{
+ 	  /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0.  */
+-	  if (SMALL_OPERAND (-rhs))
++	  if (IMM12_OPERAND (-rhs))
+ 	    {
+ 	      *op0 = loongarch_force_binary (GET_MODE (*op0), PLUS, *op0,
+ 					     GEN_INT (-rhs));
+@@ -4704,10 +4774,6 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+ 	}
+       else
+ 	{
+-	  static const enum rtx_code mag_comparisons[][2] = {
+-		{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
+-	  };
+-
+ 	  /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000).  */
+ 	  for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
+ 	    {
+@@ -4730,13 +4796,14 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+ 	}
+     }
+ 
++  loongarch_extend_comparands (*code, op0, op1);
+ 
+-  *op0 = force_reg (GET_MODE (*op0), *op0);
+-    if (*op1 != const0_rtx)
+-      *op1 = force_reg (GET_MODE (*op0), *op1);
++  *op0 = force_reg (word_mode, *op0);
++  if (*op1 != const0_rtx)
++    *op1 = force_reg (word_mode, *op1);
+ }
+ 
+-/* Like riscv_emit_int_compare, but for floating-point comparisons.  */
++/* Like loongarch_emit_int_compare, but for floating-point comparisons.  */
+ 
+ static void
+ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+@@ -4749,7 +4816,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+      then compare that register against zero.
+ 
+      Set CMP_CODE to the code of the comparison instruction and
+-   *CODE to the code that the branch or move should use.  */
++     *CODE to the code that the branch or move should use.  */
+   enum rtx_code cmp_code = *code;
+   /* Three FP conditions cannot be implemented by reversing the
+      operands for FCMP.cond.fmt, instead a reversed condition code is
+@@ -4760,7 +4827,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+   *op1 = const0_rtx;
+   loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
+ }
+-
++
+ /* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
+    and OPERAND[3].  Store the result in OPERANDS[0].
+ 
+@@ -4775,14 +4842,15 @@ loongarch_expand_scc (rtx operands[])
+   rtx op0 = operands[2];
+   rtx op1 = operands[3];
+ 
++  loongarch_extend_comparands (code, &op0, &op1);
++  op0 = force_reg (word_mode, op0);
++
+   gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
+ 
+   if (code == EQ || code == NE)
+     {
+-	{
+-	  rtx zie = loongarch_zero_if_equal (op0, op1);
+-	  loongarch_emit_binary (code, target, zie, const0_rtx);
+-	}
++      rtx zie = loongarch_zero_if_equal (op0, op1);
++      loongarch_emit_binary (code, target, zie, const0_rtx);
+     }
+   else
+     loongarch_emit_int_order_test (code, 0, target, op0, op1);
+@@ -4804,49 +4872,65 @@ loongarch_expand_conditional_branch (rtx *operands)
+   else
+     loongarch_emit_int_compare (&code, &op0, &op1);
+ 
+-  condition = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
++  condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+   emit_jump_insn (gen_condjump (condition, operands[3]));
+ }
+ 
+ /* Perform the comparison in OPERANDS[1].  Move OPERANDS[2] into OPERANDS[0]
+    if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0].  */
+-
+-void
+-loongarch_expand_conditional_move (rtx *operands)
++bool
++loongarch_expand_conditional_move_la464 (rtx *operands)
+ {
+   enum rtx_code code = GET_CODE (operands[1]);
+   rtx op0 = XEXP (operands[1], 0);
+   rtx op1 = XEXP (operands[1], 1);
++  machine_mode cmp_mode = GET_MODE(op0);
++  machine_mode sel_mode = GET_MODE(operands[2]);
+ 
++	/*ffii means Selecting a fixed point based on floating point comparison results */
+   if (FLOAT_MODE_P (GET_MODE (op1)))
+     loongarch_emit_float_compare (&code, &op0, &op1);
+   else
+     {
+-      if (code == EQ || code == NE) /*see test-mask-1.c && test-mask-5.c*/
++      loongarch_extend_comparands (code, &op0, &op1);
++
++      op0 = force_reg (word_mode, op0);
++
++      if (code == EQ || code == NE)
++	{
++	  op0 = loongarch_zero_if_equal (op0, op1);
++	  op1 = const0_rtx;
++	  /*Be careful iiff*/
++	  if(FLOAT_MODE_P(sel_mode)){
++		  rtx target = gen_reg_rtx (GET_MODE (op0));
++		  bool invert = false;
++		  loongarch_emit_int_order_test (LTU, NULL, op0,
++				  force_reg (GET_MODE (op0), const0_rtx),
++				  op0);
++		  op1 = const0_rtx;
++	  }
++	}
++      else
+ 	{
+-	  op0 = loongarch_zero_if_equal(op0, op1);
++	  /* The comparison needs a separate scc instruction.  Store the
++	     result of the scc in *OP0 and compare it against zero.  */
++	  bool invert = false;
++	  rtx target = gen_reg_rtx (GET_MODE (op0));
++	  loongarch_emit_int_order_test (code, &invert, target, op0, op1);
++	  code = invert ? EQ : NE;
++	  op0 = target;
+ 	  op1 = const0_rtx;
+ 	}
+-      else /*see test-mask-2.c*/
+-      {
+-	/* The comparison needs a separate scc instruction.  Store the
+-	   result of the scc in *OP0 and compare it against zero.  */
+-	bool invert = false;
+-	rtx target = gen_reg_rtx (GET_MODE (op0));
+-	loongarch_emit_int_order_test (code, &invert, target, op0, op1);
+-	code = invert ? EQ: NE;
+-	op0 = target;
+-	op1 = const0_rtx;
+-      }
+     }
+ 
+   rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+   /* There is no direct support for general conditional GP move involving
+-     two registers using SEL. see test-mask-3.c */
+-  if (INTEGRAL_MODE_P (GET_MODE (operands[2]))
++     two registers using SEL.  */
++   if (INTEGRAL_MODE_P (cmp_mode)
++          &&(INTEGRAL_MODE_P (sel_mode))
+       && register_operand (operands[2], VOIDmode)
+-      && register_operand (operands[3], VOIDmode))
+-    {
++      && register_operand (operands[3], VOIDmode)) {
++
+       machine_mode mode = GET_MODE (operands[0]);
+       rtx temp = gen_reg_rtx (mode);
+       rtx temp2 = gen_reg_rtx (mode);
+@@ -4864,26 +4948,72 @@ loongarch_expand_conditional_move (rtx *operands)
+ 
+       /* Merge the two results, at least one is guaranteed to be zero.  */
+       emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+-    }
+-  else
+-      emit_insn (gen_rtx_SET (operands[0],
+-	    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
+-		operands[2], operands[3])));
+-}
+-
+-
+-/* Initialize *CUM for a call to a function of type FNTYPE.  */
+ 
+-void
+-loongarch_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype)
+-{
+-  memset (cum, 0, sizeof (*cum));
+-  cum->prototype = (fntype && prototype_p (fntype));
+-  cum->gp_reg_found = (cum->prototype && stdarg_p (fntype));
++	  return true;
++  /*For ffii, iiff due to movgr2fr, movfr2gr overhead is relatively large,
++   * so we use some compromise*/
++    } else if (INTEGRAL_MODE_P (cmp_mode)
++      &&(FLOAT_MODE_P (sel_mode))
++      && register_operand (operands[2], VOIDmode)
++      && register_operand (operands[3], VOIDmode)) {
++		 rtx temp = gen_reg_rtx(sel_mode);
++		 rtx fcc_reg =loongarch_allocate_fcc (FCCmode);
++		 rtx diop0 = convert_to_mode(E_DImode, op0, true);
++		/*stl t0 i i-> movgr2fr f0 t0 -> movfr2cf fcc0 f0 -> fsel f f*/
++		 if(sel_mode == E_DFmode){
++			 emit_insn(gen_movdgr2frdf(temp, diop0));
++			 emit_insn(gen_movfr2fccdf(fcc_reg, temp));
++		 }else if(sel_mode == E_SFmode){
++			 emit_insn(gen_movdgr2frsf(temp, diop0));
++			 emit_insn(gen_movfr2fccsf(fcc_reg, temp));
++		 }
++		 cond = gen_rtx_fmt_ee (code, GET_MODE(fcc_reg), fcc_reg, const0_rtx);
++
++		 emit_insn (gen_rtx_SET (operands[0],
++					 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
++						 operands[2], operands[3])));
++		 return true;
++	 } else if (FLOAT_MODE_P (cmp_mode)
++       &&(INTEGRAL_MODE_P (sel_mode))) {
++		/*movgr2fr f0 i -> movgr2fr f1 i -> fcmp fcc0 f f
++		 * -> fsel f3 f0 f1 -> movfr2gr t0 f3*/
++       machine_mode dst_mode = GET_MODE (operands[0]);
++       rtx temp = gen_reg_rtx (E_DFmode);
++       rtx temp2 = gen_reg_rtx (E_DFmode);
++       rtx temp3 = gen_reg_rtx (E_DFmode);
++
++	   if(CONST_INT_P(operands[2])){
++		    operands[2] = copy_to_mode_reg(dst_mode, operands[2]);
++	   }
++	   if(CONST_INT_P(operands[3])){
++		    operands[3] = copy_to_mode_reg(dst_mode, operands[3]);
++	   }
++	   if(GET_MODE(operands[2]) != E_DImode)
++		   operands[2] = convert_to_mode(E_DImode, operands[2], false);
++	   if(GET_MODE(operands[3]) != E_DImode)
++		   operands[3] = convert_to_mode(E_DImode, operands[3], false);
++
++		emit_insn(gen_movdgr2frdf(temp2, operands[2]));
++		emit_insn(gen_movdgr2frdf(temp3, operands[3]));
++
++		emit_insn (gen_rtx_SET (temp,
++					gen_rtx_IF_THEN_ELSE (E_DFmode, cond,
++						temp2, temp3)));
++		if(GET_MODE(operands[0]) == E_DImode)
++			emit_insn(gen_movdfr2grdi(operands[0], temp));
++		else if(GET_MODE(operands[0]) == E_SImode)
++			emit_insn(gen_movdfr2grsi(operands[0], temp));
++		return true;
++	 } else if(FLOAT_MODE_P (cmp_mode)
++			 &&FLOAT_MODE_P (sel_mode)){
++		 emit_insn (gen_rtx_SET (operands[0],
++					 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
++						 operands[2], operands[3])));
++		 return true;
++	 }
++
++   return false;
+ }
+-
+-
+-
+ /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+ 
+ static void
+@@ -4893,100 +5023,15 @@ loongarch_va_start (tree valist, rtx nextarg)
+   std_expand_builtin_va_start (valist, nextarg);
+ }
+ 
+-
+-/* Start a definition of function NAME. */
+-
+-static void
+-loongarch_start_function_definition (const char *name)
+-{
+-  ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function");
+-
+-  /* Start the definition proper.  */
+-  assemble_name (asm_out_file, name);
+-  fputs (":\n", asm_out_file);
+-}
+-
+-/* End a function definition started by loongarch_start_function_definition.  */
+-
+-static void
+-loongarch_end_function_definition (const char *name)
+-{
+-}
+-
+ /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+ 
+ static bool
+-loongarch_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
++loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
++				   tree exp ATTRIBUTE_UNUSED)
+ {
+-  if (!TARGET_SIBCALLS)
+-    return false;
+-
+-  /* Interrupt handlers need special epilogue code and therefore can't
+-     use sibcalls.  */
+-  if (loongarch_interrupt_type_p (TREE_TYPE (current_function_decl)))
+-    return false;
+-
+-  /* Otherwise OK.  */
++  /* Always OK.  */
+   return true;
+ }
+-
+-/* Implement a handler for STORE_BY_PIECES operations
+-   for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P.  */
+-
+-bool
+-loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
+-{
+-  /* Storing by pieces involves moving constants into registers
+-     of size MIN (ALIGN, BITS_PER_WORD), then storing them.
+-     We need to decide whether it is cheaper to load the address of
+-     constant data into a register and use a block move instead.  */
+-
+-  /* If the data is only byte aligned, then:
+-
+-     (a1) A block move of less than 4 bytes would involve three 3 LD.Bs and
+-	  3 ST.Bs.  We might as well use 3 single-instruction LIs and 3 SD.Bs
+-	  instead.
+-
+-     (a2) A block move of 4 bytes from aligned source data can use an
+-	  LD.W/ST.W sequence.  This is often better than the 4 LIs and
+-	  4 SD.Bs that we would generate when storing by pieces.  */
+-  if (align <= BITS_PER_UNIT)
+-    return size < 4;
+-
+-  /* If the data is 2-byte aligned, then:
+-
+-     (b1) A block move of less than 4 bytes would use a combination of LD.Bs,
+-	  LD.Hs, SD.Bs and SD.Hs.  We get better code by using single-instruction
+-	  LIs, SD.Bs and SD.Hs instead.
+-
+-     (b2) A block move of 4 bytes from aligned source data would again use
+-	  an LD.W/ST.W sequence.  In most cases, loading the address of
+-	  the source data would require at least one extra instruction.
+-	  It is often more efficient to use 2 single-instruction LIs and
+-	  2 SHs instead.
+-
+-     (b3) A block move of up to 3 additional bytes would be like (b1).
+-
+-     (b4) A block move of 8 bytes from aligned source data can use two
+-	  LD.W/ST.W sequences.  Both sequences are better than the 4 LIs 
+-	  and 4 ST.Hs that we'd generate when storing by pieces.
+-
+-     The reasoning for higher alignments is similar:
+-
+-     (c1) A block move of less than 4 bytes would be the same as (b1).
+-
+-     (c2) A block move of 4 bytes would use an LD.W/ST.W sequence.  Again,
+-	  loading the address of the source data would typically require
+-	  at least one extra instruction.  It is generally better to use
+-	  LUI/ORI/SW instead.
+-
+-     (c3) A block move of up to 3 additional bytes would be like (b1).
+-
+-     (c4) A block move of 8 bytes can use two LD.W/ST.W sequences or a single
+-	  LD.D/ST.D sequence, and in these cases we've traditionally preferred
+-	  the memory copy over the more bulky constant moves.  */
+-  return size < 8;
+-}
+ 
+ /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+    Assume that the areas do not overlap.  */
+@@ -4999,20 +5044,13 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+   int i;
+   machine_mode mode;
+   rtx *regs;
++  
++  if (STRICT_ALIGNMENT)
++    bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
++  else 
++    bits = BITS_PER_WORD;
+ 
+-  /* Work out how many bits to move at a time.  If both operands have
+-     half-word alignment, it is usually better to move in half words.
+-     For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr
+-     and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr.
+-     Otherwise move word-sized chunks.
+-
+-     For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise
+-     picking the minimum of alignment or BITS_PER_WORD gets us the
+-     desired size for bits.  */
+-
+-  bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
+-
+-  if (TARGET_LASX)
++  if (ISA_HAS_LASX && !STRICT_ALIGNMENT)
+     {
+       bits = BITS_PER_WORD * 4;
+       mode = V4DImode;
+@@ -5029,7 +5067,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+ 
+   /* Load as many BITS-sized chunks as possible.  Use a normal load if
+      the source has enough alignment, otherwise use left/right pairs.  */
+-  if (TARGET_LASX)
++  if (ISA_HAS_LASX && !STRICT_ALIGNMENT)
+     {
+       for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+ 	{
+@@ -5047,7 +5085,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+     }
+ 
+   /* Copy the chunks to the destination.  */
+-  if (TARGET_LASX)
++  if (ISA_HAS_LASX && !STRICT_ALIGNMENT)
+     {
+ 
+       for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+@@ -5065,9 +5103,9 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+   /* Mop up any left-over bytes.  */
+   if (offset < length)
+     {
+-      if (TARGET_LASX)
++      if (ISA_HAS_LASX && !STRICT_ALIGNMENT)
+ 	{
+-	  if(length - offset >= 16)
++	  if (length - offset >= 16)
+ 	    {
+ 	      rtx *regs_tmp = XALLOCAVEC (rtx, 1);
+ 	      regs_tmp[0] = gen_reg_rtx (V2DImode);
+@@ -5075,7 +5113,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+ 	      loongarch_emit_move (adjust_address (dest, V2DImode, offset), regs_tmp[0]);
+ 	      offset += 16;
+ 	    }
+-	  if(length - offset  >= 8)
++	  if (length - offset  >= 8)
+ 	    {
+ 	      rtx *regs_tmp = XALLOCAVEC (rtx, 1);
+ 	      regs_tmp[0] = gen_reg_rtx (DImode);
+@@ -5083,7 +5121,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+ 	      loongarch_emit_move (adjust_address (dest, DImode, offset), regs_tmp[0]);
+ 	      offset += 8;
+ 	    }
+-	  if(length - offset >= 4)
++	  if (length - offset >= 4)
+ 	    {
+ 	      rtx *regs_tmp = XALLOCAVEC (rtx, 1);
+ 	      regs_tmp[0] = gen_reg_rtx (SImode);
+@@ -5091,7 +5129,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+ 	      loongarch_emit_move (adjust_address (dest, SImode, offset), regs_tmp[0]);
+ 	      offset += 4;
+ 	    }
+-	  if(length - offset >= 2)
++	  if (length - offset >= 2)
+ 	    {
+ 	      rtx *regs_tmp = XALLOCAVEC (rtx, 1);
+ 	      regs_tmp[0] = gen_reg_rtx (HImode);
+@@ -5099,7 +5137,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+ 	      loongarch_emit_move (adjust_address (dest, HImode, offset), regs_tmp[0]);
+ 	      offset += 2;
+ 	    }
+-	  if(length - offset >= 1)
++	  if (length - offset >= 1)
+ 	    {
+ 	      rtx *regs_tmp = XALLOCAVEC (rtx, 1);
+ 	      regs_tmp[0] = gen_reg_rtx (QImode);
+@@ -5108,7 +5146,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+ 	      offset += 1;
+ 	    }
+ 
+-	  if(length - offset != 0)
++	  if (length - offset != 0)
+ 	    gcc_unreachable ();
+ 	}
+       else
+@@ -5131,8 +5169,8 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+ 
+ static void
+-loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+-		       rtx *loop_reg, rtx *loop_mem)
++loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
++			    rtx *loop_mem)
+ {
+   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+ 
+@@ -5148,7 +5186,7 @@ loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+ 
+ static void
+ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+-		      HOST_WIDE_INT bytes_per_iter)
++			   HOST_WIDE_INT bytes_per_iter)
+ {
+   rtx_code_label *label;
+   rtx src_reg, dest_reg, final_src, test;
+@@ -5163,8 +5201,8 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+ 
+   /* Calculate the value that SRC_REG should have after the last iteration
+      of the loop.  */
+-  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+-				   0, 0, OPTAB_WIDEN);
++  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), 0,
++				   0, OPTAB_WIDEN);
+ 
+   /* Emit the start of the loop.  */
+   label = gen_label_rtx ();
+@@ -5174,8 +5212,10 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+   loongarch_block_move_straight (dest, src, bytes_per_iter);
+ 
+   /* Move on to the next block.  */
+-  loongarch_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
+-  loongarch_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
++  loongarch_emit_move (src_reg,
++		       plus_constant (Pmode, src_reg, bytes_per_iter));
++  loongarch_emit_move (dest_reg,
++		       plus_constant (Pmode, dest_reg, bytes_per_iter));
+ 
+   /* Emit the loop condition.  */
+   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+@@ -5198,12 +5238,12 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+ bool
+ loongarch_expand_block_move (rtx dest, rtx src, rtx length)
+ {
+-
+-  int max_move_bytes = (TARGET_LASX ?			\
++  int max_move_bytes = (ISA_HAS_LASX ?			\
+   			 LARCH_MAX_MOVE_BYTES_STRAIGHT * 8	\
+ 			 : LARCH_MAX_MOVE_BYTES_STRAIGHT);
+ 
+-  if (CONST_INT_P (length) && INTVAL (length) <= loongarch_max_inline_memcpy_size)
++  if (CONST_INT_P (length)
++      && INTVAL (length) <= loongarch_max_inline_memcpy_size)
+     {
+       if (INTVAL (length) <= max_move_bytes)
+ 	{
+@@ -5213,13 +5253,12 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx length)
+       else if (optimize)
+ 	{
+ 	  loongarch_block_move_loop (dest, src, INTVAL (length),
+-				LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER);
++				     LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER);
+ 	  return true;
+ 	}
+     }
+   return false;
+ }
+-
+ 
+ /* Expand a QI or HI mode atomic memory operation.
+ 
+@@ -5239,13 +5278,12 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx length)
+ 
+ void
+ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
+-                         rtx result, rtx mem, rtx oldval,
+-			 rtx newval, rtx model)
++			      rtx result, rtx mem, rtx oldval, rtx newval,
++			      rtx model)
+ {
+   rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
+   rtx unshifted_mask_reg, mask, inverted_mask, si_op;
+   rtx res = NULL;
+-  rtx tmp = NULL;
+   machine_mode mode;
+ 
+   mode = GET_MODE (mem);
+@@ -5253,7 +5291,7 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
+   /* Compute the address of the containing SImode value.  */
+   orig_addr = force_reg (Pmode, XEXP (mem, 0));
+   memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr,
+-				  force_reg (Pmode, GEN_INT (-4)));
++				       force_reg (Pmode, GEN_INT (-4)));
+ 
+   /* Create a memory reference for it.  */
+   memsi = gen_rtx_MEM (SImode, memsi_addr);
+@@ -5263,7 +5301,6 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
+   /* Work out the byte offset of the QImode or HImode value,
+      counting from the least significant byte.  */
+   shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
+-
+   /* Multiply by eight to convert the shift value from bytes to bits.  */
+   loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
+ 
+@@ -5301,14 +5338,13 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
+     res = gen_reg_rtx (SImode);
+ 
+   if (newval)
+-    si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model);
++    si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval,
++			    model);
+   else if (result)
+     si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model);
+   else
+     si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model);
+ 
+-    //si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model);
+-
+   emit_insn (si_op);
+ 
+   if (result)
+@@ -5320,24 +5356,14 @@ loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
+     }
+ }
+ 
+-/* Return true if X is a MEM with the same size as MODE.  */
+-
+-bool
+-loongarch_mem_fits_mode_p (machine_mode mode, rtx x)
+-{
+-  return (MEM_P (x)
+-	  && MEM_SIZE_KNOWN_P (x)
+-	  && MEM_SIZE (x) == GET_MODE_SIZE (mode));
+-}
+-
+ /* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
+    source of an "ext" instruction or the destination of an "ins"
+    instruction.  OP must be a register operand and the following
+    conditions must hold:
+ 
+-     0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
+-     0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+-     0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
++   0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
++   0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
++   0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+ 
+    Also reject lengths equal to a word as they are better handled
+    by the move patterns.  */
+@@ -5358,31 +5384,11 @@ loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
+   return true;
+ }
+ 
+-
+-/* Return true iff OP1 and OP2 are valid operands together for the
+-   *and<MODE>3 patterns.  For the cases to consider,
+-   see the table in the comment before the pattern.  */
+-
+-bool
+-and_operands_ok (machine_mode mode, rtx op1, rtx op2)
+-{
+-
+-  if (memory_operand (op1, mode))
+-    {
+-      return and_load_operand (op2, mode);
+-    }
+-  else
+-    return and_reg_operand (op2, mode);
+-}
+-
+ /* Print the text for PRINT_OPERAND punctation character CH to FILE.
+    The punctuation characters are:
+ 
+    '.'	Print the name of the register with a hard-wired zero (zero or $r0).
+    '$'	Print the name of the stack pointer register (sp or $r3).
+-   ':'  Print "c" to use the compact version if the delay slot is a nop.
+-   '!'  Print "s" to use the short version if the delay slot contains a
+-	16-bit instruction.
+ 
+    See also loongarch_init_print_operand_punct.  */
+ 
+@@ -5399,14 +5405,6 @@ loongarch_print_operand_punctuation (FILE *file, int ch)
+       fputs (reg_names[STACK_POINTER_REGNUM], file);
+       break;
+ 
+-    case ':':
+-      /* When final_sequence is 0, the delay slot will be a nop.  We can
+-	 use the compact version where available.  The %: formatter will
+-	 only be present if a compact form of the branch is available.  */
+-      if (final_sequence == 0)
+-	putc ('c', file);
+-      break;
+-
+     default:
+       gcc_unreachable ();
+       break;
+@@ -5420,7 +5418,7 @@ loongarch_init_print_operand_punct (void)
+ {
+   const char *p;
+ 
+-  for (p = ".$:"; *p; p++)
++  for (p = ".$"; *p; p++)
+     loongarch_print_operand_punct[(unsigned char) *p] = true;
+ }
+ 
+@@ -5429,7 +5427,8 @@ loongarch_init_print_operand_punct (void)
+    opcode to FILE.  */
+ 
+ static void
+-loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter)
++loongarch_print_int_branch_condition (FILE *file, enum rtx_code code,
++				      int letter)
+ {
+   switch (code)
+     {
+@@ -5443,7 +5442,7 @@ loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter
+     case GEU:
+     case LTU:
+     case LEU:
+-      /* Conveniently, the LARCH names for these conditions are the same
++      /* Conveniently, the LoongArch names for these conditions are the same
+ 	 as their RTL equivalents.  */
+       fputs (GET_RTX_NAME (code), file);
+       break;
+@@ -5457,7 +5456,8 @@ loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter
+ /* Likewise floating-point branches.  */
+ 
+ static void
+-loongarch_print_float_branch_condition (FILE *file, enum rtx_code code, int letter)
++loongarch_print_float_branch_condition (FILE *file, enum rtx_code code,
++					int letter)
+ {
+   switch (code)
+     {
+@@ -5487,20 +5487,22 @@ loongarch_print_operand_punct_valid_p (unsigned char code)
+    implement the release portion of memory model MODEL.  */
+ 
+ static bool
+-loongarch_memmodel_needs_rel_and_acq_fence (enum memmodel model)
++loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
+ {
+   switch (model)
+     {
+       case MEMMODEL_ACQ_REL:
+       case MEMMODEL_SEQ_CST:
+       case MEMMODEL_SYNC_SEQ_CST:
+-      case MEMMODEL_RELEASE:
+       case MEMMODEL_SYNC_RELEASE:
+-      case MEMMODEL_ACQUIRE:
+-      case MEMMODEL_CONSUME:
+       case MEMMODEL_SYNC_ACQUIRE:
+ 	return true;
+ 
++      case MEMMODEL_RELEASE:
++      case MEMMODEL_ACQUIRE:
++      case MEMMODEL_CONSUME:
++	if (!TARGET_uARCH_LA664)
++	  return true;
+       case MEMMODEL_RELAXED:
+ 	return false;
+ 
+@@ -5517,25 +5519,25 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
+ {
+   switch (model)
+     {
+-      case MEMMODEL_ACQ_REL:
+-      case MEMMODEL_SEQ_CST:
+-      case MEMMODEL_SYNC_SEQ_CST:
+-      case MEMMODEL_RELEASE:
+-      case MEMMODEL_SYNC_RELEASE:
+-	return true;
++    case MEMMODEL_ACQ_REL:
++    case MEMMODEL_SEQ_CST:
++    case MEMMODEL_SYNC_SEQ_CST:
++    case MEMMODEL_RELEASE:
++    case MEMMODEL_SYNC_RELEASE:
++      return true;
+ 
+-      case MEMMODEL_ACQUIRE:
+-      case MEMMODEL_CONSUME:
+-      case MEMMODEL_SYNC_ACQUIRE:
+-      case MEMMODEL_RELAXED:
+-	return false;
++    case MEMMODEL_ACQUIRE:
++    case MEMMODEL_CONSUME:
++    case MEMMODEL_SYNC_ACQUIRE:
++    case MEMMODEL_RELAXED:
++      return false;
+ 
+-      default:
+-	gcc_unreachable ();
++    default:
++      gcc_unreachable ();
+     }
+ }
+ 
+-/* Implement TARGET_PRINT_OPERAND.  The LARCH-specific operand codes are:
++/* Implement TARGET_PRINT_OPERAND.  The LoongArch-specific operand codes are:
+ 
+    'E'	Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
+    'X'	Print CONST_INT OP in hexadecimal format.
+@@ -5674,7 +5676,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+       break;
+ 
+     case 'N':
+-      loongarch_print_int_branch_condition (file, reverse_condition (code), letter);
++      loongarch_print_int_branch_condition (file, reverse_condition (code),
++					    letter);
+       break;
+ 
+     case 'F':
+@@ -5683,19 +5686,20 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ 
+     case 'W':
+       loongarch_print_float_branch_condition (file, reverse_condition (code),
+-					 letter);
++					      letter);
+       break;
+ 
+     case 'T':
+     case 't':
+       {
+ 	int truth = (code == NE) == (letter == 'T');
+-	fputc ("zfnt"[truth * 2 + ST_REG_P (REGNO (XEXP (op, 0)))], file);
++	fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
+       }
+       break;
+ 
+     case 'Y':
+-      if (code == CONST_INT && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions))
++      if (code == CONST_INT
++	  && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions))
+ 	fputs (loongarch_fp_conditions[UINTVAL (op)], file);
+       else
+ 	output_operand_lossage ("'%%%c' is not a valid operand prefix",
+@@ -5750,18 +5754,36 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+       break;
+ 
+     case 'A':
+-      if (loongarch_memmodel_needs_rel_and_acq_fence ((enum memmodel) INTVAL (op)))
++      if (loongarch_memmodel_needs_rel_acq_fence ((enum memmodel) INTVAL (op)))
+ 	fputs ("_db", file);
+       break;
+ 
+     case 'G':
+       if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+-	fputs ("dbar\t0", file);
++	fputs ("dbar\t0x11", file);
++      break;
++
++    case 'J':
++      if (TARGET_uARCH_LA664)
++	{
++	  enum memmodel model = memmodel_from_int (INTVAL (op));
++	  if (is_mm_release (model))
++	    fputs ("dbar\t0x12", file);
++	}
++      break;
++
++    case 'K':
++      if (TARGET_uARCH_LA664)
++	{
++	  enum memmodel model = memmodel_from_int (INTVAL (op));
++	  if (is_mm_acquire (model))
++	    fputs ("dbar\t0x18", file);
++	}
+       break;
+ 
+     case 'i':
+       if (code != REG)
+-        fputs ("i", file);
++	fputs ("i", file);
+       break;
+ 
+     default:
+@@ -5770,10 +5792,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ 	case REG:
+ 	  {
+ 	    unsigned int regno = REGNO (op);
+-	    if ((letter == 'M')
+-		|| letter == 'D')
+-	      regno++;
+-	    else if (letter && letter != 'z' && letter != 'M' && letter != 'L')
++	    if (letter && letter != 'z')
+ 	      output_operand_lossage ("invalid use of '%%%c'", letter);
+ 	    fprintf (file, "%s", reg_names[regno]);
+ 	  }
+@@ -5781,8 +5800,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ 
+ 	case MEM:
+ 	  if (letter == 'D')
+-	    output_address (GET_MODE (op), plus_constant (Pmode,
+-							  XEXP (op, 0), 4));
++	    output_address (GET_MODE (op),
++			    plus_constant (Pmode, XEXP (op, 0), 4));
+ 	  else if (letter == 'b')
+ 	    {
+ 	      gcc_assert (REG_P (XEXP (op, 0)));
+@@ -5809,7 +5828,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+ 
+ static void
+-loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
++loongarch_print_operand_address (FILE *file, machine_mode /* mode  */, rtx x)
+ {
+   struct loongarch_address_info addr;
+ 
+@@ -5821,6 +5840,11 @@ loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
+ 	loongarch_print_operand (file, addr.offset, 0);
+ 	return;
+ 
++      case ADDRESS_REG_REG:
++	fprintf (file, "%s,%s", reg_names[REGNO (addr.reg)],
++		 reg_names[REGNO (addr.offset)]);
++	return;
++
+       case ADDRESS_CONST_INT:
+ 	fprintf (file, "%s,", reg_names[GP_REG_FIRST]);
+ 	output_addr_const (file, x);
+@@ -5830,37 +5854,17 @@ loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
+ 	output_addr_const (file, loongarch_strip_unspec_address (x));
+ 	return;
+       }
+-  if (GET_CODE (x) == CONST_INT)
++  if (CONST_INT_P (x))
+     output_addr_const (file, x);
+   else
+     gcc_unreachable ();
+ }
+ 
+-
+-/* Implement TARGET_ENCODE_SECTION_INFO.  */
+-
+-static void
+-loongarch_encode_section_info (tree decl, rtx rtl, int first)
+-{
+-  default_encode_section_info (decl, rtl, first);
+-
+-  if (TREE_CODE (decl) == FUNCTION_DECL)
+-    {
+-      rtx symbol = XEXP (rtl, 0);
+-      tree type = TREE_TYPE (decl);
+-
+-      /* Encode whether the symbol is short or long.  */
+-      if ((TARGET_LONG_CALLS && !loongarch_near_type_p (type))
+-	  || loongarch_far_type_p (type))
+-	SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_LONG_CALL;
+-    }
+-}
+-
+-/* Implement TARGET_SELECT_RTX_SECTION.  */
++/* Implement TARGET_ASM_SELECT_RTX_SECTION.  */
+ 
+ static section *
+ loongarch_select_rtx_section (machine_mode mode, rtx x,
+-			 unsigned HOST_WIDE_INT align)
++			      unsigned HOST_WIDE_INT align)
+ {
+   /* ??? Consider using mergeable small data sections.  */
+   if (loongarch_rtx_constant_in_small_data_p (mode))
+@@ -5871,12 +5875,10 @@ loongarch_select_rtx_section (machine_mode mode, rtx x,
+ 
+ /* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
+ 
+-   The complication here is that, with the combination
+-   !TARGET_ABSOLUTE_ABICALLS , jump tables will use
+-   absolute addresses, and should therefore not be included in the
+-   read-only part of a DSO.  Handle such cases by selecting a normal
+-   data section instead of a read-only one.  The logic apes that in
+-   default_function_rodata_section.  */
++   The complication here is that jump atbles will use absolute addresses,
++   and should therefore not be included in the read-only part of a DSO.
++   Handle such cases by selecting a normal data section instead of a
++   read-only one.  The logic apes that in default_function_rodata_section.  */
+ 
+ static section *
+ loongarch_function_rodata_section (tree decl)
+@@ -5889,17 +5891,11 @@ loongarch_function_rodata_section (tree decl)
+ static bool
+ loongarch_in_small_data_p (const_tree decl)
+ {
+-  unsigned HOST_WIDE_INT size;
++  int size;
+ 
+   if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
+     return false;
+ 
+-  /* We don't yet generate small-data references for
+-     VxWorks RTP code.  See the related -G handling in
+-     loongarch_option_override.  */
+-  if (TARGET_VXWORKS_RTP)
+-    return false;
+-
+   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+     {
+       const char *name;
+@@ -5918,23 +5914,12 @@ loongarch_in_small_data_p (const_tree decl)
+   /* We have traditionally not treated zero-sized objects as small data,
+      so this is now effectively part of the ABI.  */
+   size = int_size_in_bytes (TREE_TYPE (decl));
+-  return size > 0 && size <= loongarch_small_data_threshold;
++  return size > 0 && size <= g_switch_value;
+ }
+ 
+-/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
+-   anchors for small data: the GP register acts as an anchor in that
+-   case.  We also don't want to use them for PC-relative accesses,
+-   where the PC acts as an anchor.  */
+-
+-static bool
+-loongarch_use_anchors_for_symbol_p (const_rtx symbol)
+-{
+-  return default_use_anchors_for_symbol_p (symbol);
+-}
+-
+-/* The LARCH debug format wants all automatic variables and arguments
++/* The LoongArch debug format wants all automatic variables and arguments
+    to be in terms of the virtual frame pointer (stack pointer before
+-   any adjustment in the function), while the LARCH 3.0 linker wants
++   any adjustment in the function), while the LoongArch linker wants
+    the frame pointer to be the stack pointer after the initial
+    adjustment.  So, we do the adjustment here.  The arg pointer (which
+    is eliminated) points to the virtual frame pointer, while the frame
+@@ -5961,7 +5946,7 @@ loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset)
+ 
+   return offset;
+ }
+-
++
+ /* Implement ASM_OUTPUT_EXTERNAL.  */
+ 
+ void
+@@ -5971,7 +5956,7 @@ loongarch_output_external (FILE *file, tree decl, const char *name)
+ 
+   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+      set in order to avoid putting out names that are never really
+-     used. */
++     used.  */
+   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+     {
+       if (loongarch_in_small_data_p (decl))
+@@ -6014,33 +5999,6 @@ loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x)
+   fputs ("+0x8000", file);
+ }
+ 
+-/* Implement TARGET_DWARF_REGISTER_SPAN.  */
+-
+-static rtx
+-loongarch_dwarf_register_span (rtx reg)
+-{
+-  rtx high, low;
+-  machine_mode mode;
+-
+-  mode = GET_MODE (reg);
+-
+-  return NULL_RTX;
+-}
+-
+-/* Implement TARGET_DWARF_FRAME_REG_MODE.  */
+-
+-static machine_mode
+-loongarch_dwarf_frame_reg_mode (int regno)
+-{
+-  machine_mode mode = default_dwarf_frame_reg_mode (regno);
+-
+-  if (FP_REG_P (regno) && loongarch_abi == ABILP32 && TARGET_FLOAT64)
+-    mode = SImode;
+-
+-  return mode;
+-}
+-
+-
+ /* Implement ASM_OUTPUT_ASCII.  */
+ 
+ void
+@@ -6072,7 +6030,7 @@ loongarch_output_ascii (FILE *stream, const char *string, size_t len)
+ 	  cur_pos += 4;
+ 	}
+ 
+-      if (cur_pos > 72 && i+1 < len)
++      if (cur_pos > 72 && i + 1 < len)
+ 	{
+ 	  cur_pos = 17;
+ 	  fprintf (stream, "\"\n\t.ascii\t\"");
+@@ -6081,194 +6039,6 @@ loongarch_output_ascii (FILE *stream, const char *string, size_t len)
+   fprintf (stream, "\"\n");
+ }
+ 
+-/* Emit either a label, .comm, or .lcomm directive.  When using assembler
+-   macros, mark the symbol as written so that loongarch_asm_output_external
+-   won't emit an .extern for it.  STREAM is the output file, NAME is the
+-   name of the symbol, INIT_STRING is the string that should be written
+-   before the symbol and FINAL_STRING is the string that should be
+-   written after it.  FINAL_STRING is a printf format that consumes the
+-   remaining arguments.  */
+-
+-void
+-loongarch_declare_object (FILE *stream, const char *name, const char *init_string,
+-		     const char *final_string, ...)
+-{
+-  va_list ap;
+-
+-  fputs (init_string, stream);
+-  assemble_name (stream, name);
+-  va_start (ap, final_string);
+-  vfprintf (stream, final_string, ap);
+-  va_end (ap);
+-
+-  tree name_tree = get_identifier (name);
+-  TREE_ASM_WRITTEN (name_tree) = 1;
+-}
+-
+-/* Declare a common object of SIZE bytes using asm directive INIT_STRING.
+-   NAME is the name of the object and ALIGN is the required alignment
+-   in bytes.  TAKES_ALIGNMENT_P is true if the directive takes a third
+-   alignment argument.  */
+-
+-void
+-loongarch_declare_common_object (FILE *stream, const char *name,
+-			    const char *init_string,
+-			    unsigned HOST_WIDE_INT size,
+-			    unsigned int align, bool takes_alignment_p)
+-{
+-  if (!takes_alignment_p)
+-    {
+-      size += (align / BITS_PER_UNIT) - 1;
+-      size -= size % (align / BITS_PER_UNIT);
+-      loongarch_declare_object (stream, name, init_string,
+-			   "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size);
+-    }
+-  else
+-    loongarch_declare_object (stream, name, init_string,
+-			 "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
+-			 size, align / BITS_PER_UNIT);
+-}
+-
+-/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON.  This is usually the same as the
+-   elfos.h version, but we also need to handle -muninit-const-in-rodata.  */
+-
+-void
+-loongarch_output_aligned_decl_common (FILE *stream, tree decl, const char *name,
+-				 unsigned HOST_WIDE_INT size,
+-				 unsigned int align)
+-{
+-  loongarch_declare_common_object (stream, name, "\n\t.comm\t",
+-				   size, align, true);
+-}
+-
+-#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
+-extern int size_directive_output;
+-
+-/* Implement ASM_DECLARE_OBJECT_NAME.  This is like most of the standard ELF
+-   definitions except that it uses loongarch_declare_object to emit the label.  */
+-
+-void
+-loongarch_declare_object_name (FILE *stream, const char *name,
+-			  tree decl ATTRIBUTE_UNUSED)
+-{
+-#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+-#ifdef USE_GNU_UNIQUE_OBJECT
+-  /* As in elfos.h.  */
+-  if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (decl)
+-      && (!DECL_ARTIFICIAL (decl) || !TREE_READONLY (decl)))
+-    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "gnu_unique_object");
+-  else
+-#endif
+-    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+-#endif
+-
+-  size_directive_output = 0;
+-  if (!flag_inhibit_size_directive && DECL_SIZE (decl))
+-    {
+-      HOST_WIDE_INT size;
+-
+-      size_directive_output = 1;
+-      size = int_size_in_bytes (TREE_TYPE (decl));
+-      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+-    }
+-
+-  loongarch_declare_object (stream, name, "", ":\n");
+-}
+-
+-/* Implement ASM_FINISH_DECLARE_OBJECT.  This is generic ELF stuff.  */
+-
+-void
+-loongarch_finish_declare_object (FILE *stream, tree decl, int top_level, int at_end)
+-{
+-  const char *name;
+-
+-  name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+-  if (!flag_inhibit_size_directive
+-      && DECL_SIZE (decl) != 0
+-      && !at_end
+-      && top_level
+-      && DECL_INITIAL (decl) == error_mark_node
+-      && !size_directive_output)
+-    {
+-      HOST_WIDE_INT size;
+-
+-      size_directive_output = 1;
+-      size = int_size_in_bytes (TREE_TYPE (decl));
+-      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+-    }
+-}
+-#endif
+-
+-/* Mark text contents as code or data, mainly for the purpose of correct
+-   disassembly.  Emit a local symbol and set its type appropriately for
+-   that purpose.  Also emit `.insn' if marking contents as code so that
+-   the ISA mode is recorded and any padding that follows is disassembled
+-   as correct instructions.  */
+-
+-void
+-loongarch_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED,
+-			     const char *prefix ATTRIBUTE_UNUSED,
+-			     unsigned long num ATTRIBUTE_UNUSED,
+-			     bool function_p ATTRIBUTE_UNUSED)
+-{
+-#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+-  char buf[(sizeof (num) * 10) / 4 + 2];
+-  const char *fnname;
+-  char *sname;
+-  rtx symbol;
+-
+-  sprintf (buf, "%lu", num);
+-  symbol = XEXP (DECL_RTL (current_function_decl), 0);
+-  fnname = targetm.strip_name_encoding (XSTR (symbol, 0));
+-  sname = ACONCAT ((prefix, fnname, "_", buf, NULL));
+-
+-  ASM_OUTPUT_TYPE_DIRECTIVE (file, sname, function_p ? "function" : "object");
+-  assemble_name (file, sname);
+-  fputs (":\n", file);
+-//  if (function_p)
+-//    fputs ("\t.insn\n", file);
+-#endif
+-}
+-
+-
+-/* Implement TARGET_ASM_FILE_START.  */
+-
+-static void
+-loongarch_file_start (void)
+-{
+-  default_file_start ();
+-
+-  /* Generate a special section to describe the ABI switches used to
+-     produce the resultant binary.  */
+-}
+-
+-
+-/* Return true if REGNO is a register that is ordinarily call-clobbered
+-   but must nevertheless be preserved by an interrupt handler.  */
+-
+-static bool
+-loongarch_interrupt_extra_call_saved_reg_p (unsigned int regno)
+-{
+-  if (GP_REG_P (regno)
+-      && cfun->machine->use_shadow_register_set == SHADOW_SET_NO)
+-    {
+-      /* $0 is hard-wired.  */
+-      if (regno == GP_REG_FIRST)
+-	return false;
+-
+-      /* The function will return the stack pointer to its original value
+-	 anyway.  */
+-      if (regno == STACK_POINTER_REGNUM)
+-	return false;
+-
+-      /* Otherwise, return true for registers that aren't ordinarily
+-	 call-clobbered.  */
+-      return call_used_regs[regno];
+-    }
+-
+-  return false;
+-}
+-
+ /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+ 
+ static bool
+@@ -6282,17 +6052,20 @@ loongarch_frame_pointer_required (void)
+   return false;
+ }
+ 
+-/* Make sure that we're not trying to eliminate to the wrong hard frame
+-   pointer.  */
++/* Implement TARGET_CAN_ELIMINATE.  Make sure that we're not trying
++   to eliminate to the wrong hard frame pointer.  */
+ 
+ static bool
+ loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+ {
++  if (stack_realign_fp)
++    return ((from == ARG_POINTER_REGNUM
++	     && to == HARD_FRAME_POINTER_REGNUM)
++	    || (from == FRAME_POINTER_REGNUM
++		&& to == STACK_POINTER_REGNUM));
+   return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
+ }
+ 
+-
+-
+ /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
+    previous frame.  */
+ 
+@@ -6315,73 +6088,21 @@ loongarch_set_return_address (rtx address, rtx scratch)
+   rtx slot_address;
+ 
+   gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
++
+   if (frame_pointer_needed)
+     slot_address = loongarch_add_offset (scratch, hard_frame_pointer_rtx,
+-				  -UNITS_PER_WORD);
++					 -UNITS_PER_WORD);
+   else
+     slot_address = loongarch_add_offset (scratch, stack_pointer_rtx,
+-				 cfun->machine->frame.gp_sp_offset);
+-  loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
+-}
+-
+-
+-/* Fill *BASE and *OFFSET such that *BASE + *OFFSET refers to the
+-   cprestore slot.  LOAD_P is true if the caller wants to load from
+-   the cprestore slot; it is false if the caller wants to store to
+-   the slot.  */
+-
+-static void
+-loongarch_get_cprestore_base_and_offset (rtx *base, HOST_WIDE_INT *offset,
+-				    bool load_p)
+-{
+-  const struct loongarch_frame_info *frame;
+-
+-  frame = &cfun->machine->frame;
+-  /* .cprestore always uses the stack pointer instead of the frame pointer.
+-     We have a free choice for direct stores,
+-     Using the stack pointer would sometimes give more
+-     (early) scheduling freedom, but using the frame pointer would
+-     sometimes give more (late) scheduling freedom.  It's hard to
+-     predict which applies to a given function, so let's keep things
+-     simple.
+-
+-     Loads must always use the frame pointer in functions that call
+-     alloca, and there's little benefit to using the stack pointer
+-     otherwise.  */
+-  if (frame_pointer_needed)
+-    {
+-      *base = hard_frame_pointer_rtx;
+-      *offset = frame->args_size - frame->hard_frame_pointer_offset;
+-    }
+-  else
+-    {
+-      *base = stack_pointer_rtx;
+-      *offset = frame->args_size;
+-    }
+-}
++					 cfun->machine->frame.gp_sp_offset);
+ 
+-/* Return true if X is the load or store address of the cprestore slot;
+-   LOAD_P says which.  */
+-
+-bool
+-loongarch_cprestore_address_p (rtx x, bool load_p)
+-{
+-  rtx given_base, required_base;
+-  HOST_WIDE_INT given_offset, required_offset;
+-
+-  loongarch_split_plus (x, &given_base, &given_offset);
+-  loongarch_get_cprestore_base_and_offset (&required_base, &required_offset, load_p);
+-  return given_base == required_base && given_offset == required_offset;
++  loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address),
++		       address);
+ }
+ 
+-
+-/* A function to save or store a register.  The first argument is the
+-   register and the second is the stack slot.  */
+-typedef void (*loongarch_save_restore_fn) (rtx, rtx);
+-
+ /* LOONGSON LA464 Emit insn pattern for gssq and gslq*/
+ void
+-loongarch_la464_emit_128bit_load(rtx operands[])
++loongarch_la464_emit_128bit_load (rtx operands[])
+ {
+   rtx op0;
+   rtx op1;
+@@ -6389,9 +6110,9 @@ loongarch_la464_emit_128bit_load(rtx operands[])
+   rtx op3;
+ 
+ #if 0 /*for debug*/
+-  printf("464po: emit 128 PO LOAD!\n");
+-  printf("reg num of op0 is: %d\n",REGNO(operands[0]));
+-  printf("reg num of op2 is: %d\n",REGNO(operands[2]));
++  printf ("464po: emit 128 PO LOAD!\n");
++  printf ("reg num of op0 is: %d\n",REGNO (operands[0]));
++  printf ("reg num of op2 is: %d\n",REGNO (operands[2]));
+ #endif
+   op0 = gen_rtx_REG (GET_MODE (operands[0]), REGNO (operands[0]));
+   op1 = operands[1];
+@@ -6403,8 +6124,8 @@ loongarch_la464_emit_128bit_load(rtx operands[])
+ 				     gen_rtx_SET (op2,op3))));
+ }
+ 
+-void 
+-loongarch_la464_emit_128bit_store(rtx operands[])
++void
++loongarch_la464_emit_128bit_store (rtx operands[])
+ {
+   rtx op0;
+   rtx op1;
+@@ -6412,10 +6133,10 @@ loongarch_la464_emit_128bit_store(rtx operands[])
+   rtx op3;
+ 
+ #if 0 /*for debug*/
+-  printf("464po: emit 128 PO STORE!\n");
+-  printf("reg num of op1 is: %d\n",REGNO(operands[1]));
+-  printf("reg num of op3 is: %d\n",REGNO(operands[3]));
+-#endif 
++  printf ("464po: emit 128 PO STORE!\n");
++  printf ("reg num of op1 is: %d\n",REGNO (operands[1]));
++  printf ("reg num of op3 is: %d\n",REGNO (operands[3]));
++#endif
+   op0 = operands[0];
+   op1 = gen_rtx_REG (GET_MODE (operands[1]), REGNO (operands[1]));
+   op2 = operands[2];
+@@ -6427,405 +6148,109 @@ loongarch_la464_emit_128bit_store(rtx operands[])
+ 
+ }
+ 
++/* Return true if register REGNO can store a value of mode MODE.
++   The result of this function is cached in loongarch_hard_regno_mode_ok.  */
+ 
+-
++static bool
++loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
++{
++  unsigned int size;
++  enum mode_class mclass;
+ 
+-/* Implement ASM_DECLARE_FUNCTION_NAME. */
++  if (mode == FCCmode)
++    return FCC_REG_P (regno);
+ 
+-void loongarch_declare_function_name(FILE *stream ATTRIBUTE_UNUSED,
+-                               const char *name, tree fndecl ATTRIBUTE_UNUSED)
+-{
+-  loongarch_start_function_definition (name);
+-}
++  size = GET_MODE_SIZE (mode);
++  mclass = GET_MODE_CLASS (mode);
+ 
+-/* Implement TARGET_OUTPUT_FUNCTION_PROLOGUE.  */
++  if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) && !LASX_SUPPORTED_MODE_P (mode))
++    return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
+ 
+-static void
+-loongarch_output_function_prologue (FILE *file)
+-{
++  /* For LSX, allow TImode and 128-bit vector modes in all FPR.  */
++  if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
++    return true;
++
++  /* For LASX, allow TImode and 256-bit vector modes in all FPR. FIXME: */
++  if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
++    return true;
++
++  if (FP_REG_P (regno))
++    {
++      if (mclass == MODE_FLOAT
++	  || mclass == MODE_COMPLEX_FLOAT
++	  || mclass == MODE_VECTOR_FLOAT)
++	return size <= UNITS_PER_FPVALUE;
++
++      /* Allow integer modes that fit into a single register.  We need
++	 to put integers into FPRs when using instructions like CVT
++	 and TRUNC.  There's no point allowing sizes smaller than a word,
++	 because the FPU has no appropriate load/store instructions.  */
++      if (mclass == MODE_INT)
++	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
++    }
++
++  return false;
+ }
+ 
+-/* Implement TARGET_OUTPUT_FUNCTION_EPILOGUE.  */
++/* Implement TARGET_HARD_REGNO_MODE_OK.  */
+ 
+-static void
+-loongarch_output_function_epilogue (FILE *)
++static bool
++loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+ {
+-  const char *fnname;
+-
+-  /* Get the function name the same way that toplev.c does before calling
+-     assemble_start_function.  This is needed so that the name used here
+-     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+-  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+-  loongarch_end_function_definition (fnname);
++  return loongarch_hard_regno_mode_ok_p[mode][regno];
+ }
+-
+ 
+-#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
++static bool
++loongarch_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
++{
++  if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8)
++    return true;
+ 
+-#if PROBE_INTERVAL > 16384
+-#error Cannot use indexed addressing mode for stack probing
+-#endif
++  return false;
++}
+ 
+-/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+-   inclusive.  These are offsets from the current stack pointer.  */
++/* Implement TARGET_HARD_REGNO_NREGS.  */
+ 
+-static void
+-loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
++static unsigned int
++loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
+ {
++  if (FCC_REG_P (regno))
++    /* The size of FP status registers is always 4, because they only hold
++       FCCmode values, and FCCmode is always considered to be 4 bytes wide.  */
++    return (GET_MODE_SIZE (mode) + 3) / 4;
+ 
+-  /* See if we have a constant small number of probes to generate.  If so,
+-     that's the easy case.  */
+-  if ((TARGET_64BIT && (first + size <= 8 * PROBE_INTERVAL))
+-      || (!TARGET_64BIT && (first + size <= 2048)))
++  if (FP_REG_P (regno))
+     {
+-      HOST_WIDE_INT i;
++      if (LSX_SUPPORTED_MODE_P (mode))
++	return 1;
+ 
+-      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+-	 it exceeds SIZE.  If only one probe is needed, this will not
+-	 generate any code.  Then probe at FIRST + SIZE.  */
+-      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+-        emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+-					 -(first + i)));
++      if (LASX_SUPPORTED_MODE_P (mode))
++	return 1;
+ 
+-      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+-				       -(first + size)));
++      return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+     }
+ 
+-  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+-     extra careful with variables wrapping around because we might be at
+-     the very top (or the very bottom) of the address space and we have
+-     to be able to handle this case properly; in particular, we use an
+-     equality test for the loop condition.  */
+-  else
+-    {
+-      HOST_WIDE_INT rounded_size;
+-      rtx r13 = LARCH_PROLOGUE_TEMP (Pmode);
+-      rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
+-      rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
++  /* All other registers are word-sized.  */
++  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
++}
+ 
+-      /* Sanity check for the addressing mode we're going to use.  */
+-      gcc_assert (first <= 16384);
++/* Implement CLASS_MAX_NREGS, taking the maximum of the cases
++   in loongarch_hard_regno_nregs.  */
+ 
++int
++loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
++{
++  int size;
++  HARD_REG_SET left;
+ 
+-      /* Step 1: round SIZE to the previous multiple of the interval.  */
++  size = 0x8000;
++  COPY_HARD_REG_SET (left, reg_class_contents[(int) rclass]);
++  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FCC_REGS]))
++    {
++      if (loongarch_hard_regno_mode_ok (FCC_REG_FIRST, mode))
++	size = MIN (size, 4);
+ 
+-      rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
+-      /* TEST_ADDR = SP + FIRST */
+-      if (first != 0)
+-	{
+-	  emit_move_insn (r14, GEN_INT (first));
+-	  emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode, stack_pointer_rtx, r14)));
+-	}
+-      else
+-        emit_move_insn (r13, stack_pointer_rtx);
+-
+-      /* Step 2: compute initial and final value of the loop counter.  */
+-
+-      emit_move_insn (r14, GEN_INT (PROBE_INTERVAL));
+-      if (rounded_size == 0)
+-        emit_move_insn (r12, r13);
+-      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+-      else
+-	{
+-          emit_move_insn (r12, GEN_INT (rounded_size));
+-	  emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12)));
+-	  /* Step 3: the loop
+-
+-	     do
+-	     {
+-	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+-	     probe at TEST_ADDR
+-	     }
+-	     while (TEST_ADDR != LAST_ADDR)
+-
+-	     probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+-	     until it is equal to ROUNDED_SIZE.  */
+-
+-	  emit_insn (PMODE_INSN (gen_probe_stack_range, (r13, r13, r12, r14)));
+-	}
+-
+-      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+-	 that SIZE is equal to ROUNDED_SIZE.  */
+-
+-      if (size != rounded_size)
+-	{
+-	  if (TARGET_64BIT)
+-	    emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
+-	  else
+-	    {
+-	      HOST_WIDE_INT i;
+-	      for (i = 2048; i < (size - rounded_size); i += 2048 )
+-		{
+-		  emit_stack_probe (plus_constant (Pmode, r12, -i));
+-		  emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, r12, -2048)));
+-		}
+-	      emit_stack_probe (plus_constant (Pmode, r12, -(size - rounded_size - i + 2048)));
+-	    }
+-	}
+-    }
+-
+-  /* Make sure nothing is scheduled before we are done.  */
+-  emit_insn (gen_blockage ());
+-}
+-
+-/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+-   absolute addresses.  */
+-
+-const char *
+-loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
+-{
+-  static int labelno = 0;
+-  char loop_lab[32], tmp[64];
+-  rtx xops[3];
+-
+-  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
+-
+-  /* Loop.  */
+-  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+-
+-  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+-  xops[0] = reg1;
+-  xops[1] = GEN_INT (-PROBE_INTERVAL);
+-  xops[2] = reg3;
+-  if (TARGET_64BIT)
+-    output_asm_insn ("sub.d\t%0,%0,%2", xops);
+-  else
+-    output_asm_insn ("sub.w\t%0,%0,%2", xops);
+-
+-  /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch.  */
+-  xops[1] = reg2;
+-  strcpy (tmp, "bne\t%0,%1,");
+-  if (TARGET_64BIT)
+-    output_asm_insn ("st.d\t$r0,%0,0", xops);
+-  else
+-    output_asm_insn ("st.w\t$r0,%0,0", xops);
+-  output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
+-
+-  return "";
+-}
+-
+-/* Expand the "prologue" pattern.  */
+-
+-void
+-loongarch_expand_prologue (void)
+-{
+-  struct loongarch_frame_info *frame = &cfun->machine->frame;
+-  HOST_WIDE_INT size = frame->total_size;
+-  unsigned mask = frame->mask;
+-  rtx insn;
+-
+-  if (flag_stack_usage_info)
+-    current_function_static_stack_size = size;
+-
+-  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+-      || flag_stack_clash_protection)
+-    {
+-      if (crtl->is_leaf && !cfun->calls_alloca)
+-	{
+-	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+-	    loongarch_emit_probe_stack_range (get_stack_check_protect (),
+-					 size - get_stack_check_protect ());
+-	}
+-      else if (size > 0)
+-	loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
+-    }
+-
+-  /* When optimizing for size, call a subroutine to save the registers.  */
+-  if (loongarch_use_save_libcall (frame))
+-    {
+-      rtx dwarf = NULL_RTX;
+-      dwarf = loongarch_adjust_libcall_cfi_prologue ();
+-
+-      frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
+-      size -= frame->save_libcall_adjustment;
+-      insn = emit_insn (gen_gpr_save (GEN_INT (mask)));
+-
+-      RTX_FRAME_RELATED_P (insn) = 1;
+-      REG_NOTES (insn) = dwarf;
+-    }
+-
+-  /* Save the registers.  */
+-  if ((frame->mask | frame->fmask) != 0)
+-    {
+-      HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
+-
+-      insn = gen_add3_insn (stack_pointer_rtx,
+-			    stack_pointer_rtx,
+-			    GEN_INT (-step1));
+-      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+-      size -= step1;
+-      loongarch_for_each_saved_reg (size, loongarch_save_reg);
+-    }
+-
+-  frame->mask = mask; /* Undo the above fib.  */
+-
+-  /* Set up the frame pointer, if we're using one.  */
+-  if (frame_pointer_needed)
+-    {
+-      insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
+-			    GEN_INT (frame->hard_frame_pointer_offset - size));
+-      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+-
+-      loongarch_emit_stack_tie ();
+-    }
+-
+-  /* Allocate the rest of the frame.  */
+-  if (size > 0)
+-    {
+-      if (SMALL_OPERAND (-size))
+-	{
+-	  insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+-				GEN_INT (-size));
+-	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+-	}
+-      else
+-	{
+-	  loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
+-	  emit_insn (gen_add3_insn (stack_pointer_rtx,
+-				    stack_pointer_rtx,
+-				    N_LARCH_PROLOGUE_TEMP (Pmode)));
+-
+-	  /* Describe the effect of the previous instructions.  */
+-	  insn = plus_constant (Pmode, stack_pointer_rtx, -size);
+-	  insn = gen_rtx_SET (stack_pointer_rtx, insn);
+-	  loongarch_set_frame_expr (insn);
+-	}
+-    }
+-}
+-
+-
+-/* Return true if register REGNO can store a value of mode MODE.
+-   The result of this function is cached in loongarch_hard_regno_mode_ok.  */
+-
+-static bool
+-loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
+-{
+-  unsigned int size;
+-  enum mode_class mclass;
+-
+-  if (mode == FCCmode)
+-    return ST_REG_P (regno);
+-
+-  size = GET_MODE_SIZE (mode);
+-  mclass = GET_MODE_CLASS (mode);
+-
+-  if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) && !LASX_SUPPORTED_MODE_P (mode))
+-    return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
+-
+-  /* For LSX, allow TImode and 128-bit vector modes in all FPR.  */
+-  if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
+-    return true;
+-
+-  /* For LASX, allow TImode and 256-bit vector modes in all FPR. FIXME: */
+-  if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
+-    return true;
+-
+-  if (FP_REG_P (regno)
+-      && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0
+-	  || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG)))
+-    {
+-      if (mclass == MODE_FLOAT
+-	  || mclass == MODE_COMPLEX_FLOAT
+-	  || mclass == MODE_VECTOR_FLOAT)
+-	return size <= UNITS_PER_FPVALUE;
+-
+-      /* Allow integer modes that fit into a single register.  We need
+-	 to put integers into FPRs when using instructions like CVT
+-	 and TRUNC.  There's no point allowing sizes smaller than a word,
+-	 because the FPU has no appropriate load/store instructions.  */
+-      if (mclass == MODE_INT)
+-	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
+-    }
+-
+-  return false;
+-}
+-
+-/* Implement TARGET_HARD_REGNO_MODE_OK.  */
+-
+-static bool
+-loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+-{
+-  return loongarch_hard_regno_mode_ok_p[mode][regno];
+-}
+-
+-/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+-
+-bool
+-loongarch_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+-			   unsigned int new_reg)
+-{
+-  /* Interrupt functions can only use registers that have already been
+-     saved by the prologue, even if they would normally be call-clobbered.  */
+-  if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (new_reg))
+-    return false;
+-
+-  return true;
+-}
+-
+-/* Return nonzero if register REGNO can be used as a scratch register
+-   in peephole2.  */
+-
+-bool
+-loongarch_hard_regno_scratch_ok (unsigned int regno)
+-{
+-  /* See loongarch_hard_regno_rename_ok.  */
+-  if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (regno))
+-    return false;
+-
+-  return true;
+-}
+-
+-static bool
+-loongarch_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
+-{
+-  if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8)
+-    return true;
+-
+-  return false;
+-}
+-
+-/* Implement TARGET_HARD_REGNO_NREGS.  */
+-
+-static unsigned int
+-loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
+-{
+-  if (ST_REG_P (regno))
+-    /* The size of FP status registers is always 4, because they only hold
+-       FCCmode values, and FCCmode is always considered to be 4 bytes wide.  */
+-    return (GET_MODE_SIZE (mode) + 3) / 4;
+-
+-  if (FP_REG_P (regno))
+-    {
+-      if (LSX_SUPPORTED_MODE_P (mode))
+-	return 1;
+-
+-      if (LASX_SUPPORTED_MODE_P (mode))
+-	return 1;
+-
+-      return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+-    }
+-
+-  /* All other registers are word-sized.  */
+-  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+-}
+-
+-/* Implement CLASS_MAX_NREGS, taking the maximum of the cases
+-   in loongarch_hard_regno_nregs.  */
+-
+-int
+-loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
+-{
+-  int size;
+-  HARD_REG_SET left;
+-
+-  size = 0x8000;
+-  COPY_HARD_REG_SET (left, reg_class_contents[(int) rclass]);
+-  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) ST_REGS]))
+-    {
+-      if (loongarch_hard_regno_mode_ok (ST_REG_FIRST, mode))
+-	size = MIN (size, 4);
+-
+-      AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) ST_REGS]);
++      AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) FCC_REGS]);
+     }
+   if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
+     {
+@@ -6849,8 +6274,8 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
+ /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
+ 
+ static bool
+-loongarch_can_change_mode_class (machine_mode from,
+-			    machine_mode to, reg_class_t rclass)
++loongarch_can_change_mode_class (machine_mode from, machine_mode to,
++				 reg_class_t rclass)
+ {
+   /* Allow conversions between different Loongson integer vectors,
+      and between those vectors and DImode.  */
+@@ -6866,42 +6291,10 @@ loongarch_can_change_mode_class (machine_mode from,
+   if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
+     return true;
+ 
+-  /* Otherwise, there are several problems with changing the modes of
+-     values in floating-point registers:
+-
+-     - When a multi-word value is stored in paired floating-point
+-       registers, the first register always holds the low word.  We
+-       therefore can't allow FPRs to change between single-word and
+-       multi-word modes on big-endian targets.
+-
+-     - GCC assumes that each word of a multiword register can be
+-       accessed individually using SUBREGs.  This is not true for
+-       floating-point registers if they are bigger than a word.
+-
+-     - Loading a 32-bit value into a 64-bit floating-point register
+-       will not sign-extend the value, despite what LOAD_EXTEND_OP
+-       says.  We can't allow FPRs to change from SImode to a wider
+-       mode on 64-bit targets.
+-
+-     - If the FPU has already interpreted a value in one format, we
+-       must not ask it to treat the value as having a different
+-       format.
+-
+-     We therefore disallow all mode changes involving FPRs.  */
+-
+   return !reg_classes_intersect_p (FP_REGS, rclass);
+ }
+ 
+-/* Implement target hook small_register_classes_for_mode_p.  */
+-
+-static bool
+-loongarch_small_register_classes_for_mode_p (machine_mode mode
+-					ATTRIBUTE_UNUSED)
+-{
+-  return 0;
+-}
+-
+-/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction,
++/* Return true if moves in mode MODE can use the FPU's fmov.fmt instruction,
+    or use the LSX's move.v instruction.  */
+ 
+ static bool
+@@ -6909,6 +6302,7 @@ loongarch_mode_ok_for_mov_fmt_p (machine_mode mode)
+ {
+   switch (mode)
+     {
++    case E_FCCmode:
+     case E_SFmode:
+       return TARGET_HARD_FLOAT;
+ 
+@@ -6976,7 +6370,7 @@ loongarch_move_to_gpr_cost (reg_class_t from)
+       return 2;
+ 
+     case FP_REGS:
+-      /* MFC1, etc.  */
++      /* MOVFR2GR, etc.  */
+       return 4;
+ 
+     default:
+@@ -6998,7 +6392,7 @@ loongarch_move_from_gpr_cost (reg_class_t to)
+       return 2;
+ 
+     case FP_REGS:
+-      /* MTC1, etc.  */
++      /* MOVGR2FR, etc.  */
+       return 4;
+ 
+     default:
+@@ -7011,8 +6405,8 @@ loongarch_move_from_gpr_cost (reg_class_t to)
+    the maximum for us.  */
+ 
+ static int
+-loongarch_register_move_cost (machine_mode mode,
+-			 reg_class_t from, reg_class_t to)
++loongarch_register_move_cost (machine_mode mode, reg_class_t from,
++			      reg_class_t to)
+ {
+   reg_class_t dregs;
+   int cost1, cost2;
+@@ -7024,7 +6418,7 @@ loongarch_register_move_cost (machine_mode mode,
+   if (from == FP_REGS)
+     {
+       if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode))
+-	/* MOV.FMT.  */
++	/* FMOV.FMT.  */
+ 	return 4;
+     }
+ 
+@@ -7054,28 +6448,6 @@ loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
+ {
+   return (loongarch_cost->memory_latency
+ 	  + memory_move_secondary_cost (mode, rclass, in));
+-} 
+-
+-/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
+-
+-   When targeting the o32 FPXX ABI, all moves with a length of doubleword
+-   or greater must be performed by FR-mode-aware instructions.
+-   This can be achieved using MOVFRH2GR.S/MOVGR2FRH.W when these instructions are
+-   available but otherwise moves must go via memory.
+-   Using MOVGR2FR/MOVFR2GR to access the lower-half of these registers would require 
+-   a forbidden single-precision access.  We require all double-word moves to use
+-   memory because adding even and odd floating-point registers classes
+-   would have a significant impact on the backend.  */
+-
+-static bool
+-loongarch_secondary_memory_needed (machine_mode mode, reg_class_t class1,
+-			      reg_class_t class2)
+-{
+-  /* Ignore spilled pseudos.  */
+-  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+-    return false;
+-
+-  return false;
+ }
+ 
+ /* Return the register class required for a secondary register when
+@@ -7084,9 +6456,10 @@ loongarch_secondary_memory_needed (machine_mode mode, reg_class_t class1,
+    is the destination.  Return NO_REGS if no secondary register is
+    needed.  */
+ 
+-enum reg_class
+-loongarch_secondary_reload_class (enum reg_class rclass,
+-			     machine_mode mode, rtx x, bool)
++static reg_class_t
++loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
++			    reg_class_t rclass, machine_mode mode,
++			    secondary_reload_info *sri ATTRIBUTE_UNUSED)
+ {
+   int regno;
+ 
+@@ -7094,15 +6467,12 @@ loongarch_secondary_reload_class (enum reg_class rclass,
+ 
+   /* Copying from accumulator registers to anywhere other than a general
+      register requires a temporary general register.  */
+-//  if (reg_class_subset_p (rclass, ACC_REGS)) ??????
+-//    return GP_REG_P (regno) ? NO_REGS : GR_REGS;
+   if (reg_class_subset_p (rclass, FP_REGS))
+     {
+       if (regno < 0
+ 	  || (MEM_P (x)
+ 	      && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)))
+-	/* In this case we can use lwc1, swc1, ldc1 or sdc1.  We'll use
+-	   pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported.  */
++	/* In this case we can use fld.s, fst.s, fld.d or fst.d.  */
+ 	return NO_REGS;
+ 
+       if (MEM_P (x) && LSX_SUPPORTED_MODE_P (mode))
+@@ -7110,17 +6480,18 @@ loongarch_secondary_reload_class (enum reg_class rclass,
+ 	return NO_REGS;
+ 
+       if (GP_REG_P (regno) || x == CONST0_RTX (mode))
+-	/* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or movfr2gr.d.  */
++	/* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or
++	 * movfr2gr.d.  */
+ 	return NO_REGS;
+ 
+       if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x))
+-	/* We can force the constant to memory and use lwc1
+-	   and ldc1.  As above, we will use pairs of lwc1s if
++	/* We can force the constant to memory and use fld.s
++	   and fld.d.  As above, we will use pairs of lwc1s if
+ 	   ldc1 is not supported.  */
+ 	return NO_REGS;
+ 
+       if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode))
+-	/* In this case we can use mov.fmt.  */
++	/* In this case we can use fmov.{s/d}.  */
+ 	return NO_REGS;
+ 
+       /* Otherwise, we need to reload through an integer register.  */
+@@ -7132,7 +6503,19 @@ loongarch_secondary_reload_class (enum reg_class rclass,
+   return NO_REGS;
+ }
+ 
+-
++/* Implement TARGET_MODE_REP_EXTENDED  */
++
++static int
++loongarch_mode_rep_extended (scalar_int_mode mode, scalar_int_mode mode_rep)
++{
++  /* On 64-bit targets, SImode register values are sign-extended to DImode.  */
++  if (TARGET_64BIT && mode == SImode && mode_rep == DImode)
++    return SIGN_EXTEND;
++
++  return UNKNOWN;
++}
++
++
+ /* Implement TARGET_VALID_POINTER_MODE.  */
+ 
+ static bool
+@@ -7160,7 +6543,7 @@ loongarch_scalar_mode_supported_p (scalar_mode mode)
+ 
+   return default_scalar_mode_supported_p (mode);
+ }
+-
++
+ /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+ 
+ static machine_mode
+@@ -7233,17 +6616,15 @@ loongarch_adjust_insn_length (rtx_insn *insn, int length)
+     length +=  4;
+ 
+   /* See how many nops might be needed to avoid hardware hazards.  */
+-  if (!cfun->machine->ignore_hazard_length_p
+-      && INSN_P (insn)
++  if (INSN_P (insn)
+       && INSN_CODE (insn) >= 0)
+     switch (get_attr_hazard (insn))
+       {
+       case HAZARD_NONE:
+ 	break;
+ 
+-      case HAZARD_DELAY:
+       case HAZARD_FORBIDDEN_SLOT:
+-	length += NOP_INSN_LENGTH;
++	length += 4;
+ 	break;
+       }
+ 
+@@ -7258,8 +6639,8 @@ loongarch_adjust_insn_length (rtx_insn *insn, int length)
+ 
+ const char *
+ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
+-				const char *branch_if_true,
+-				const char *branch_if_false)
++				     const char *branch_if_true,
++				     const char *branch_if_false)
+ {
+   unsigned int length;
+   rtx taken;
+@@ -7272,8 +6653,7 @@ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
+       return branch_if_true;
+     }
+ 
+-  /* Generate a reversed branch around a direct jump.  This fallback does
+-     not use branch-likely instructions.  */
++  /* Generate a reversed branch around a direct jump.  */
+   rtx_code_label *not_taken = gen_label_rtx ();
+   taken = operands[0];
+ 
+@@ -7281,37 +6661,7 @@ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
+   operands[0] = not_taken;
+   output_asm_insn (branch_if_false, operands);
+ 
+-  /* If INSN has a delay slot, we must provide delay slots for both the
+-     branch to NOT_TAKEN and the conditional jump.  We must also ensure
+-     that INSN's delay slot is executed in the appropriate cases.  */
+-  if (final_sequence)
+-    {
+-      /* This first delay slot will always be executed, so use INSN's
+-	 delay slot if is not annulled.  */
+-      if (!INSN_ANNULLED_BRANCH_P (insn))
+-	{
+-	  final_scan_insn (final_sequence->insn (1),
+-			   asm_out_file, optimize, 1, NULL);
+-	  final_sequence->insn (1)->set_deleted ();
+-	}
+-      fprintf (asm_out_file, "\n");
+-    }
+-
+-  output_asm_insn (LARCH_ABSOLUTE_JUMP ("b\t%0"), &taken);
+-
+-  /* Now deal with its delay slot; see above.  */
+-  if (final_sequence)
+-    {
+-      /* This delay slot will only be executed if the branch is taken.
+-	 Use INSN's delay slot if is annulled.  */
+-      if (INSN_ANNULLED_BRANCH_P (insn))
+-	{
+-	  final_scan_insn (final_sequence->insn (1),
+-			   asm_out_file, optimize, 1, NULL);
+-	  final_sequence->insn (1)->set_deleted ();
+-	}
+-      fprintf (asm_out_file, "\n");
+-    }
++  output_asm_insn ("b\t%0", &taken);
+ 
+   /* Output NOT_TAKEN.  */
+   targetm.asm_out.internal_label (asm_out_file, "L",
+@@ -7326,21 +6676,23 @@ loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
+    OPERANDS[3] is the second operand and may be zero or a register.  */
+ 
+ const char *
+-loongarch_output_equal_conditional_branch (rtx_insn* insn, rtx *operands,
+-				      bool inverted_p)
++loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands,
++					   bool inverted_p)
+ {
+   const char *branch[2];
+   if (operands[3] == const0_rtx)
+     {
+       branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0");
+       branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0");
+-    } else 
++    }
++  else
+     {
+       branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0");
+       branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0");
+     }
+ 
+-  return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]);
++  return loongarch_output_conditional_branch (insn, operands, branch[1],
++					      branch[0]);
+ }
+ 
+ /* Return the assembly code for INSN, which branches to OPERANDS[0]
+@@ -7351,7 +6703,7 @@ loongarch_output_equal_conditional_branch (rtx_insn* insn, rtx *operands,
+ 
+ const char *
+ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+-				      bool inverted_p)
++					   bool inverted_p)
+ {
+   const char *branch[2];
+ 
+@@ -7377,7 +6729,7 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+ 	      branch[!inverted_p] = LARCH_BRANCH ("b", "%0");
+ 	      branch[inverted_p] = "\t# branch never";
+ 	      break;
+-	   default:
++	    default:
+ 	      gcc_unreachable ();
+ 	    }
+ 	}
+@@ -7385,31 +6737,19 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+ 	{
+ 	  switch (GET_CODE (operands[1]))
+ 	    {
+-	      case LE:
+-		branch[!inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0");
+-		branch[inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0");
+-		break;
+-	      case LEU:
+-		branch[!inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0");
+-		branch[inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0");
+-		break;
+-	      case GT:
+-		branch[!inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0");
+-		branch[inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0");
+-		break;
+-	      case GTU:
+-		branch[!inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0");
+-		branch[inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0");
+-		break;
+-	      case LT:
+-	      case LTU:
+-	      case GE:
+-	      case GEU:
+-		branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0");
+-		branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0");
+-		break;
+-	      default:
+-		gcc_unreachable ();
++	    case LE:
++	    case LEU:
++	    case GT:
++	    case GTU:
++	    case LT:
++	    case LTU:
++	    case GE:
++	    case GEU:
++	      branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0");
++	      branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0");
++	      break;
++	    default:
++	      gcc_unreachable ();
+ 	    }
+ 	}
+     }
+@@ -7419,30 +6759,11 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+ 	{
+ 	  /* These cases are equivalent to comparisons against zero.  */
+ 	case LEU:
+-	  inverted_p = !inverted_p;
+-	  /* Fall through.  */
+ 	case GTU:
+-	  branch[!inverted_p] = LARCH_BRANCH ("bne", "%2,%.,%0");
+-	  branch[inverted_p] = LARCH_BRANCH ("beq", "%2,%.,%0");
+-	  break;
+-
+-	  /* These cases are always true or always false.  */
+ 	case LTU:
+-	  inverted_p = !inverted_p;
+-	  /* Fall through.  */
+ 	case GEU:
+-	  branch[!inverted_p] = LARCH_BRANCH ("beq", "%.,%.,%0");
+-	  branch[inverted_p] = LARCH_BRANCH ("bne", "%.,%.,%0");
+-	  break;
+-
+-	case LE:	  
+-	  branch[!inverted_p] = LARCH_BRANCH ("bge", "$r0,%2,%0");
+-	  branch[inverted_p] = LARCH_BRANCH ("blt", "$r0,%2,%0");
+-	  break;
++	case LE:
+ 	case GT:
+-	  branch[!inverted_p] = LARCH_BRANCH ("blt", "$r0,%2,%0");
+-	  branch[inverted_p] = LARCH_BRANCH ("bge", "$r0,%2,%0");
+-	  break;
+ 	case LT:
+ 	case GE:
+ 	  branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$r0,%0");
+@@ -7451,98 +6772,14 @@ loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+-    }  
+-  return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]);
++    }
++  return loongarch_output_conditional_branch (insn, operands, branch[1],
++					      branch[0]);
+ }
+-
+-/* Return the assembly code for DIV or DDIV instruction DIVISION, which has
+-   the operands given by OPERANDS.  Add in a divide-by-zero check if needed.
+ 
+-   When working around R4000 and R4400 errata, we need to make sure that
+-   the division is not immediately followed by a shift[1][2].  We also
+-   need to stop the division from being put into a branch delay slot[3].
+-   The easiest way to avoid both problems is to add a nop after the
+-   division.  When a divide-by-zero check is needed, this nop can be
+-   used to fill the branch delay slot.
+-
+-   [1] If a double-word or a variable shift executes immediately
+-       after starting an integer division, the shift may give an
+-       incorrect result.  See quotations of errata #16 and #28 from
+-       "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+-       in loongarch.md for details.
+-
+-   [2] A similar bug to [1] exists for all revisions of the
+-       R4000 and the R4400 when run in an MC configuration.
+-       From "LARCH R4000MC Errata, Processor Revision 2.2 and 3.0":
+-
+-       "19. In this following sequence:
+-
+-		    ddiv		(or ddivu or div or divu)
+-		    dsll32		(or dsrl32, dsra32)
+-
+-	    if an MPT stall occurs, while the divide is slipping the cpu
+-	    pipeline, then the following double shift would end up with an
+-	    incorrect result.
+-
+-	    Workaround: The compiler needs to avoid generating any
+-	    sequence with divide followed by extended double shift."
+-
+-       This erratum is also present in "LARCH R4400MC Errata, Processor
+-       Revision 1.0" and "LARCH R4400MC Errata, Processor Revision 2.0
+-       & 3.0" as errata #10 and #4, respectively.
+-
+-   [3] From "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+-       (also valid for LARCH R4000MC processors):
+-
+-       "52. R4000SC: This bug does not apply for the R4000PC.
+-
+-	    There are two flavors of this bug:
+-
+-	    1) If the instruction just after divide takes an RF exception
+-	       (tlb-refill, tlb-invalid) and gets an instruction cache
+-	       miss (both primary and secondary) and the line which is
+-	       currently in secondary cache at this index had the first
+-	       data word, where the bits 5..2 are set, then R4000 would
+-	       get a wrong result for the div.
+-
+-	    ##1
+-		    nop
+-		    div	r8, r9
+-		    -------------------		# end-of page. -tlb-refill
+-		    nop
+-	    ##2
+-		    nop
+-		    div	r8, r9
+-		    -------------------		# end-of page. -tlb-invalid
+-		    nop
+-
+-	    2) If the divide is in the taken branch delay slot, where the
+-	       target takes RF exception and gets an I-cache miss for the
+-	       exception vector or where I-cache miss occurs for the
+-	       target address, under the above mentioned scenarios, the
+-	       div would get wrong results.
+-
+-	    ##1
+-		    j	r2		# to next page mapped or unmapped
+-		    div	r8,r9		# this bug would be there as long
+-					# as there is an ICache miss and
+-		    nop			# the "data pattern" is present
+-
+-	    ##2
+-		    beq	r0, r0, NextPage	# to Next page
+-		    div	r8,r9
+-		    nop
+-
+-	    This bug is present for div, divu, ddiv, and ddivu
+-	    instructions.
+-
+-	    Workaround: For item 1), OS could make sure that the next page
+-	    after the divide instruction is also mapped.  For item 2), the
+-	    compiler could make sure that the divide instruction is not in
+-	    the branch delay slot."
+-
+-       These processors have PRId values of 0x00004220 and 0x00004300 for
+-       the R4000 and 0x00004400, 0x00004500 and 0x00004600 for the R4400.  */
++/* Return the assembly code for DIV.{W/D} instruction DIVISION, which has
++   the operands given by OPERANDS.  Add in a divide-by-zero check if needed.
++   */
+ 
+ const char *
+ loongarch_output_division (const char *division, rtx *operands)
+@@ -7571,13 +6808,13 @@ loongarch_lsx_output_division (const char *division, rtx *operands)
+   s = division;
+   if (TARGET_CHECK_ZERO_DIV)
+     {
+-      if(ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32)
++      if (ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32)
+ 	{
+ 	  output_asm_insn ("xvsetallnez.%v0\t$fcc7,%u2",operands);
+           output_asm_insn (s, operands);
+ 	  output_asm_insn ("bcnez\t$fcc7,1f", operands);
+ 	}
+-      else if(ISA_HAS_LSX)
++      else if (ISA_HAS_LSX)
+ 	{
+ 	  output_asm_insn ("vsetallnez.%v0\t$fcc7,%w2",operands);
+           output_asm_insn (s, operands);
+@@ -7587,80 +6824,13 @@ loongarch_lsx_output_division (const char *division, rtx *operands)
+     }
+   return s;
+ }
+-
+-/* Return true if destination of IN_INSN is used as add source in
+-   OUT_INSN. Both IN_INSN and OUT_INSN are of type fmadd. Example:
+-   madd.s dst, x, y, z
+-   madd.s a, dst, b, c  */
+-
+-bool
+-loongarch_fmadd_bypass (rtx_insn *out_insn, rtx_insn *in_insn)
+-{
+-  int dst_reg, src_reg;
+-  
+-  gcc_assert (get_attr_type (in_insn) == TYPE_FMADD);
+-  gcc_assert (get_attr_type (out_insn) == TYPE_FMADD);
+-
+-  extract_insn (in_insn);
+-  dst_reg = REG_P (recog_data.operand[0]);
+-
+-  extract_insn (out_insn);
+-  src_reg = REG_P (recog_data.operand[1]);
+-
+-  if (dst_reg == src_reg)
+-    return true;
+-
+-  return false;
+-}
+-
+-/* Return true if IN_INSN is a multiply-add or multiply-subtract
+-   instruction and if OUT_INSN assigns to the accumulator operand.  */
+-
+-bool
+-loongarch_linked_madd_p (rtx_insn *out_insn, rtx_insn *in_insn)
+-{
+-  enum attr_accum_in accum_in;
+-  int accum_in_opnum;
+-  rtx accum_in_op;
+-
+-  if (recog_memoized (in_insn) < 0)
+-    return false;
+-
+-  accum_in = get_attr_accum_in (in_insn);
+-  if (accum_in == ACCUM_IN_NONE)
+-    return false;
+-
+-  accum_in_opnum = accum_in - ACCUM_IN_0;
+-
+-  extract_insn (in_insn);
+-  gcc_assert (accum_in_opnum < recog_data.n_operands);
+-  accum_in_op = recog_data.operand[accum_in_opnum];
+-
+-  return reg_set_p (accum_in_op, out_insn);
+-}
+-
+-/* True if the dependency between OUT_INSN and IN_INSN is on the store
+-   data rather than the address.  We need this because the cprestore
+-   pattern is type "store", but is defined using an UNSPEC_VOLATILE,
+-   which causes the default routine to abort.  We just return false
+-   for that case.  */
+-
+-bool
+-loongarch_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
+-{
+-  if (GET_CODE (PATTERN (in_insn)) == UNSPEC_VOLATILE)
+-    return false;
+-
+-  return store_data_bypass_p (out_insn, in_insn);
+-}
+-
+ 
+ /* Implement TARGET_SCHED_ADJUST_COST.  We assume that anti and output
+-   dependencies have no cost, except on the 20Kc where output-dependence
+-   is treated like input-dependence.  */
++   dependencies have no cost.  */
+ 
+ static int
+-loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned int)
++loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost,
++		       unsigned int)
+ {
+   if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT))
+     return 0;
+@@ -7672,15 +6842,10 @@ loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned
+ static int
+ loongarch_issue_rate (void)
+ {
+-  switch (loongarch_tune)
+-    {
+-    case PROCESSOR_LOONGARCH64:
+-    case PROCESSOR_LA464:
+-      return 4;
+-
+-    default:
+-      return 1;
+-    }
++  if ((unsigned long) la_target.cpu_tune < N_TUNE_TYPES)
++    return loongarch_cpu_issue_rate[la_target.cpu_tune];
++  else
++    return 1;
+ }
+ 
+ /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD.  This should
+@@ -7689,24 +6854,20 @@ loongarch_issue_rate (void)
+ static int
+ loongarch_multipass_dfa_lookahead (void)
+ {
+-  if (TUNE_LOONGARCH64 || TUNE_LA464)
+-    return 4;
+-
+-  return 0;
+-}
+-
+-
+-static void
+-loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+-		 int max_ready ATTRIBUTE_UNUSED)
+-{
++  if ((unsigned long) la_target.cpu_tune < N_ARCH_TYPES)
++    return loongarch_cpu_multipass_dfa_lookahead[la_target.cpu_tune];
++  else
++    return 0;
+ }
+ 
+ /* Implement TARGET_SCHED_REORDER.  */
+ 
+ static int
+-loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+-		    rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
++loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED,
++			 int verbose ATTRIBUTE_UNUSED,
++			 rtx_insn **ready ATTRIBUTE_UNUSED,
++			 int *nreadyp ATTRIBUTE_UNUSED,
++			 int cycle ATTRIBUTE_UNUSED)
+ {
+   return loongarch_issue_rate ();
+ }
+@@ -7714,17 +6875,29 @@ loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUS
+ /* Implement TARGET_SCHED_REORDER2.  */
+ 
+ static int
+-loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+-		     rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
++loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
++			  int verbose ATTRIBUTE_UNUSED,
++			  rtx_insn **ready ATTRIBUTE_UNUSED,
++			  int *nreadyp ATTRIBUTE_UNUSED,
++			  int cycle ATTRIBUTE_UNUSED)
+ {
+   return cached_can_issue_more;
+ }
+ 
++/* Implement TARGET_SCHED_INIT.  */
++
++static void
++loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED,
++		      int verbose ATTRIBUTE_UNUSED,
++		      int max_ready ATTRIBUTE_UNUSED)
++{}
++
+ /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+ 
+ static int
+-loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+-		     rtx_insn *insn, int more)
++loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED,
++			  int verbose ATTRIBUTE_UNUSED, rtx_insn *insn,
++			  int more)
+ {
+   /* Ignore USEs and CLOBBERs; don't count them against the issue rate.  */
+   if (USEFUL_INSN_P (insn))
+@@ -7742,1243 +6915,2339 @@ loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNU
+   cached_can_issue_more = more;
+   return more;
+ }
+-
+-/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
+-   return the first operand of the associated PREF or PREFX insn.  */
+-
+-rtx
+-loongarch_prefetch_cookie (rtx write, rtx locality)
+-{
+-  /* store_streamed / load_streamed.  */
+-  if (INTVAL (locality) <= 0)
+-    return GEN_INT (INTVAL (write) + 4);
+-
+-  /* store / load.  */
+-  if (INTVAL (locality) <= 2)
+-    return write;
+-
+-  /* store_retained / load_retained.  */
+-  return GEN_INT (INTVAL (write) + 6);
+-}
+-
+-
+-/* Return whether CFG is used in loongarch_reorg.  */
+ 
+-static bool
+-loongarch_cfg_in_reorg (void)
+-{
+-  return (TARGET_RELAX_PIC_CALLS);
+-}
+-
+-/* If INSN is a call, return the underlying CALL expr.  Return NULL_RTX
+-   otherwise.  If INSN has two call rtx, then store the second one in
+-   SECOND_CALL.  */
++/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
++   in order to avoid duplicating too much logic from elsewhere.  */
+ 
+-static rtx
+-loongarch_call_expr_from_insn (rtx_insn *insn, rtx *second_call)
++static void
++loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
++			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
++			   tree function)
+ {
+-  rtx x;
+-  rtx x2;
+-
+-  if (!CALL_P (insn))
+-    return NULL_RTX;
+-
+-  x = PATTERN (insn);
+-  if (GET_CODE (x) == PARALLEL)
+-    {
+-      /* Calls returning complex values have two CALL rtx.  Look for the second
+-	 one here, and return it via the SECOND_CALL arg.  */
+-      x2 = XVECEXP (x, 0, 1);
+-      if (GET_CODE (x2) == SET)
+-	x2 = XEXP (x2, 1);
+-      if (GET_CODE (x2) == CALL)
+-	*second_call = x2;
+-
+-      x = XVECEXP (x, 0, 0);
+-    }
+-  if (GET_CODE (x) == SET)
+-    x = XEXP (x, 1);
+-  gcc_assert (GET_CODE (x) == CALL);
++  rtx this_rtx, temp1, temp2, fnaddr;
++  rtx_insn *insn;
++  bool use_sibcall_p;
+ 
+-  return x;
+-}
++  /* Pretend to be a post-reload pass while generating rtl.  */
++  reload_completed = 1;
+ 
+-/* REG is set in DEF.  See if the definition is one of the ways we load a
+-   register with a symbol address for a loongarch_use_pic_fn_addr_reg_p call.
+-   If it is, return the symbol reference of the function, otherwise return
+-   NULL_RTX.
++  /* Mark the end of the (empty) prologue.  */
++  emit_note (NOTE_INSN_PROLOGUE_END);
+ 
+-   If RECURSE_P is true, use loongarch_find_pic_call_symbol to interpret
+-   the values of source registers, otherwise treat such registers as
+-   having an unknown value.  */
++  /* Determine if we can use a sibcall to call FUNCTION directly.  */
++  fnaddr = XEXP (DECL_RTL (function), 0);
++  use_sibcall_p = const_call_insn_operand (fnaddr, Pmode);
+ 
+-static rtx
+-loongarch_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p)
+-{
+-  rtx_insn *def_insn;
+-  rtx set;
++  /* We need two temporary registers in some cases.  */
++  temp1 = gen_rtx_REG (Pmode, 12);
++  temp2 = gen_rtx_REG (Pmode, 13);
+ 
+-  if (DF_REF_IS_ARTIFICIAL (def))
+-    return NULL_RTX;
++  /* Find out which register contains the "this" pointer.  */
++  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
++    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
++  else
++    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+ 
+-  def_insn = DF_REF_INSN (def);
+-  set = single_set (def_insn);
+-  if (set && rtx_equal_p (SET_DEST (set), reg))
++  /* Add DELTA to THIS_RTX.  */
++  if (delta != 0)
+     {
+-      rtx note, src, symbol;
+-
+-      /* First see whether the source is a plain symbol.  This is used
+-	 when calling symbols that are not lazily bound.  */
+-      src = SET_SRC (set);
+-      if (GET_CODE (src) == SYMBOL_REF)
+-	return src;
+-
+-      /* Handle %call16 references.  */
+-      symbol = loongarch_strip_unspec_call (src);
+-      if (symbol)
++      rtx offset = GEN_INT (delta);
++      if (!IMM12_OPERAND (delta))
+ 	{
+-	  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+-	  return symbol;
++	  loongarch_emit_move (temp1, offset);
++	  offset = temp1;
+ 	}
+-
+-      /* If we have something more complicated, look for a
+-	 REG_EQUAL or REG_EQUIV note.  */
+-      note = find_reg_equal_equiv_note (def_insn);
+-      if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF)
+-	return XEXP (note, 0);
+-
+-      /* Follow at most one simple register copy.  Such copies are
+-	 interesting in cases like:
+-
+-	     for (...)
+-	       {
+-	         locally_binding_fn (...);
+-	       }
+-
+-	 and:
+-
+-	     locally_binding_fn (...);
+-	     ...
+-	     locally_binding_fn (...);
+-
+-	 where the load of locally_binding_fn can legitimately be
+-	 hoisted or shared.  However, we do not expect to see complex
+-	 chains of copies, so a full worklist solution to the problem
+-	 would probably be overkill.  */
+-      if (recurse_p && REG_P (src))
+-	return loongarch_find_pic_call_symbol (def_insn, src, false);
++      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+     }
+ 
+-  return NULL_RTX;
+-}
++  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
++  if (vcall_offset != 0)
++    {
++      rtx addr;
+ 
+-/* Find the definition of the use of REG in INSN.  See if the definition
+-   is one of the ways we load a register with a symbol address for a
+-   loongarch_use_pic_fn_addr_reg_p call.  If it is return the symbol reference
+-   of the function, otherwise return NULL_RTX.  RECURSE_P is as for
+-   loongarch_pic_call_symbol_from_set.  */
++      /* Set TEMP1 to *THIS_RTX.  */
++      loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
+ 
+-static rtx
+-loongarch_find_pic_call_symbol (rtx_insn *insn, rtx reg, bool recurse_p)
+-{
+-  df_ref use;
+-  struct df_link *defs;
+-  rtx symbol;
++      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
++      addr = loongarch_add_offset (temp2, temp1, vcall_offset);
+ 
+-  use = df_find_use (insn, regno_reg_rtx[REGNO (reg)]);
+-  if (!use)
+-    return NULL_RTX;
+-  defs = DF_REF_CHAIN (use);
+-  if (!defs)
+-    return NULL_RTX;
+-  symbol = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p);
+-  if (!symbol)
+-    return NULL_RTX;
++      /* Load the offset and add it to THIS_RTX.  */
++      loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
++      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
++    }
+ 
+-  /* If we have more than one definition, they need to be identical.  */
+-  for (defs = defs->next; defs; defs = defs->next)
++  /* Jump to the target function.  Use a sibcall if direct jumps are
++     allowed, otherwise load the address into a register first.  */
++  if (use_sibcall_p)
+     {
+-      rtx other;
+-
+-      other = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p);
+-      if (!rtx_equal_p (symbol, other))
+-	return NULL_RTX;
++      insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
++      SIBLING_CALL_P (insn) = 1;
++    }
++  else
++    {
++      loongarch_emit_move (temp1, fnaddr);
++      emit_jump_insn (gen_indirect_jump (temp1));
+     }
+ 
+-  return symbol;
+-}
+-
+-/* Replace the args_size operand of the call expression CALL with the
+-   call-attribute UNSPEC and fill in SYMBOL as the function symbol.  */
+-
+-static void
+-loongarch_annotate_pic_call_expr (rtx call, rtx symbol)
+-{
+-  rtx args_size;
++  /* Run just enough of rest_of_compilation.  This sequence was
++     "borrowed" from alpha.c.  */
++  insn = get_insns ();
++  split_all_insns_noflow ();
++  shorten_branches (insn);
++  final_start_function (insn, file, 1);
++  final (insn, file, 1);
++  final_end_function ();
+ 
+-  args_size = XEXP (call, 1);
+-  XEXP (call, 1) = gen_rtx_UNSPEC (GET_MODE (args_size),
+-				   gen_rtvec (2, args_size, symbol),
+-				   UNSPEC_CALL_ATTR);
++  /* Stop pretending to be a post-reload pass.  */
++  reload_completed = 0;
+ }
+ 
+-/* OPERANDS[ARGS_SIZE_OPNO] is the arg_size operand of a CALL expression.  See
+-   if instead of the arg_size argument it contains the call attributes.  If
+-   yes return true along with setting OPERANDS[ARGS_SIZE_OPNO] to the function
+-   symbol from the call attributes.  Also return false if ARGS_SIZE_OPNO is
+-   -1.  */
++/* Allocate a chunk of memory for per-function machine-dependent data.  */
+ 
+-bool
+-loongarch_get_pic_call_symbol (rtx *operands, int args_size_opno)
++static struct machine_function *
++loongarch_init_machine_status (void)
+ {
+-  rtx args_size, symbol;
+-
+-  if (!TARGET_RELAX_PIC_CALLS || args_size_opno == -1)
+-    return false;
+-
+-  args_size = operands[args_size_opno];
+-  if (GET_CODE (args_size) != UNSPEC)
+-    return false;
+-  gcc_assert (XINT (args_size, 1) == UNSPEC_CALL_ATTR);
+-
+-  symbol = XVECEXP (args_size, 0, 1);
+-  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+-
+-  operands[args_size_opno] = symbol;
+-  return true;
++  return ggc_cleared_alloc<machine_function> ();
+ }
+ 
+-/* Use DF to annotate PIC indirect calls with the function symbol they
+-   dispatch to.  */
+-
+ static void
+-loongarch_annotate_pic_calls (void)
++loongarch_cpu_option_override (struct loongarch_target *target,
++			       struct gcc_options *opts,
++			       struct gcc_options *opts_set)
+ {
+-  basic_block bb;
+-  rtx_insn *insn;
+-
+-  FOR_EACH_BB_FN (bb, cfun)
+-    FOR_BB_INSNS (bb, insn)
++  /* strict alignment */
++  switch (target->cpu_arch)
+     {
+-      rtx call, reg, symbol, second_call;
++      case CPU_LA264:
++	/* Using -mstrict-align is recommended for la264 cores.  */
++	if (!opts_set->x_TARGET_STRICT_ALIGN)
++	  {
++	    opts->x_TARGET_STRICT_ALIGN = 1;
++	    opts_set->x_TARGET_STRICT_ALIGN = 1;
++	  }
++	break;
++    }
+ 
+-      second_call = 0;
+-      call = loongarch_call_expr_from_insn (insn, &second_call);
+-      if (!call)
+-	continue;
+-      gcc_assert (MEM_P (XEXP (call, 0)));
+-      reg = XEXP (XEXP (call, 0), 0);
+-      if (!REG_P (reg))
+-	continue;
++  /* software prefetching parameters (-fprefetch-loop-arrays) */
++  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
++    loongarch_cpu_cache[target->cpu_tune].simultaneous_prefetches,
++    opts->x_param_values, opts_set->x_param_values);
+ 
+-      symbol = loongarch_find_pic_call_symbol (insn, reg, true);
+-      if (symbol)
+-	{
+-	  loongarch_annotate_pic_call_expr (call, symbol);
+-	  if (second_call)
+-	    loongarch_annotate_pic_call_expr (second_call, symbol);
+-	}
+-    }
+-}
+-
++  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
++    loongarch_cpu_cache[target->cpu_tune].l1d_line_size,
++    opts->x_param_values, opts_set->x_param_values);
+ 
+-/* A structure representing the state of the processor pipeline.
+-   Used by the loongarch_sim_* family of functions.  */
+-struct loongarch_sim {
+-  /* The maximum number of instructions that can be issued in a cycle.
+-     (Caches loongarch_issue_rate.)  */
+-  unsigned int issue_rate;
+-
+-  /* The current simulation time.  */
+-  unsigned int time;
+-
+-  /* How many more instructions can be issued in the current cycle.  */
+-  unsigned int insns_left;
+-
+-  /* LAST_SET[X].INSN is the last instruction to set register X.
+-     LAST_SET[X].TIME is the time at which that instruction was issued.
+-     INSN is null if no instruction has yet set register X.  */
+-  struct {
+-    rtx_insn *insn;
+-    unsigned int time;
+-  } last_set[FIRST_PSEUDO_REGISTER];
+-
+-  /* The pipeline's current DFA state.  */
+-  state_t dfa_state;
+-};
++  maybe_set_param_value (PARAM_L1_CACHE_SIZE,
++    loongarch_cpu_cache[target->cpu_tune].l1d_size,
++    opts->x_param_values, opts_set->x_param_values);
+ 
+-/* Reset STATE to the initial simulation state.  */
++  maybe_set_param_value (PARAM_L2_CACHE_SIZE,
++    loongarch_cpu_cache[target->cpu_tune].l2d_size,
++    opts->x_param_values, opts_set->x_param_values);
++}
+ 
+ static void
+-loongarch_sim_reset (struct loongarch_sim *state)
++loongarch_option_override_internal (struct gcc_options *opts,
++				    struct gcc_options *opts_set)
+ {
+-  curr_state = state->dfa_state;
++  int i, regno, mode;
+ 
+-  state->time = 0;
+-  state->insns_left = state->issue_rate;
+-  memset (&state->last_set, 0, sizeof (state->last_set));
+-  state_reset (curr_state);
++  if (flag_pic)
++    g_switch_value = 0;
+ 
+-  targetm.sched.init (0, false, 0);
+-  advance_state (curr_state);
+-}
++  loongarch_init_target (&la_target,
++			 la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu,
++			 la_opt_simd, la_opt_abi_base, la_opt_abi_ext,
++			 la_opt_cmodel);
+ 
+-/* Initialize STATE before its first use.  DFA_STATE points to an
+-   allocated but uninitialized DFA state.  */
++  /* Handle target-specific options: compute defaults/conflicts etc.  */
++  loongarch_config_target (&la_target, NULL, 0);
+ 
+-static void
+-loongarch_sim_init (struct loongarch_sim *state, state_t dfa_state)
+-{
+-  if (targetm.sched.init_dfa_pre_cycle_insn)
+-    targetm.sched.init_dfa_pre_cycle_insn ();
++  loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
++  loongarch_cpu_option_override (&la_target, opts, opts_set);
+ 
+-  if (targetm.sched.init_dfa_post_cycle_insn)
+-    targetm.sched.init_dfa_post_cycle_insn ();
++  if (TARGET_ABI_LP64)
++    flag_pcc_struct_return = 0;
+ 
+-  state->issue_rate = loongarch_issue_rate ();
+-  state->dfa_state = dfa_state;
+-  loongarch_sim_reset (state);
+-}
++  /* Decide which rtx_costs structure to use.  */
++  if (optimize_size)
++    loongarch_cost = &loongarch_rtx_cost_optimize_size;
++  else
++    loongarch_cost = &loongarch_cpu_rtx_cost_data[la_target.cpu_tune];
+ 
+-
++  /* If the user hasn't specified a branch cost, use the processor's
++     default.  */
++  if (loongarch_branch_cost == 0)
++    loongarch_branch_cost = loongarch_cost->branch_cost;
+ 
+-/* Set up costs based on the current architecture and tuning settings.  */
++  if (loongarch_vector_access_cost == 0)
++    loongarch_vector_access_cost = 5;
+ 
+-static void
+-loongarch_set_tuning_info (void)
+-{
+ 
+-  loongarch_tuning_info.arch = loongarch_arch;
+-  loongarch_tuning_info.tune = loongarch_tune;
+-  loongarch_tuning_info.initialized_p = true;
++  /* Enable sw prefetching at -O3 and higher. */
++  if (opts->x_flag_prefetch_loop_arrays < 0
++      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
++      && !opts->x_optimize_size)
++    opts->x_flag_prefetch_loop_arrays = 1;
+ 
+-  dfa_start ();
++  switch (la_target.cmodel)
++    {
++      case CMODEL_TINY_STATIC:
++      case CMODEL_EXTREME:
++	if (opts->x_flag_plt)
++	  error ("code model %qs and %qs not support %s mode",
++		 "tiny-static", "extreme", "plt");
++	break;
+ 
+-  struct loongarch_sim state;
+-  loongarch_sim_init (&state, alloca (state_size ()));
++      case CMODEL_NORMAL:
++      case CMODEL_TINY:
++      case CMODEL_LARGE:
++	break;
+ 
+-  dfa_finish ();
+-}
++      default:
++	gcc_unreachable ();
++    }
+ 
+-/* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
++  loongarch_init_print_operand_punct ();
+ 
+-static void
+-loongarch_expand_to_rtl_hook (void)
+-{
+-  /* We need to call this at a point where we can safely create sequences
+-     of instructions, so TARGET_OVERRIDE_OPTIONS is too early.  We also
+-     need to call it at a point where the DFA infrastructure is not
+-     already in use, so we can't just call it lazily on demand.
+-
+-     At present, loongarch_tuning_info is only needed during post-expand
+-     RTL passes such as split_insns, so this hook should be early enough.
+-     We may need to move the call elsewhere if loongarch_tuning_info starts
+-     to be used for other things (such as rtx_costs, or expanders that
+-     could be called during gimple optimization).  */
+-  loongarch_set_tuning_info ();
+-}
+-
+-/* This structure records that the current function has a LO_SUM
+-   involving SYMBOL_REF or LABEL_REF BASE and that MAX_OFFSET is
+-   the largest offset applied to BASE by all such LO_SUMs.  */
+-struct loongarch_lo_sum_offset {
+-  rtx base;
+-  HOST_WIDE_INT offset;
+-};
++  /* Set up array to map GCC register number to debug register number.
++     Ignore the special purpose register numbers.  */
+ 
+-/* Return a hash value for SYMBOL_REF or LABEL_REF BASE.  */
++  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++    {
++      if (GP_REG_P (i) || FP_REG_P (i))
++	loongarch_dwarf_regno[i] = i;
++      else
++	loongarch_dwarf_regno[i] = INVALID_REGNUM;
++    }
+ 
+-static hashval_t
+-loongarch_hash_base (rtx base)
+-{
+-  int do_not_record_p;
++  /* Set up loongarch_hard_regno_mode_ok.  */
++  for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
++    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
++      loongarch_hard_regno_mode_ok_p[mode][regno]
++	= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
+ 
+-  return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false);
+-}
++  /* Function to allocate machine-dependent function status.  */
++  init_machine_status = &loongarch_init_machine_status;
+ 
+-/* Hashtable helpers.  */
++  /* If not optimizing for size, set the default
++     alignment to what the target wants.  */
++  if (!opts->x_optimize_size)
++    {
++      if (opts->x_align_loops <= 0)
++	opts->x_align_loops = 16;
++      if (opts->x_align_jumps <= 0)
++	opts->x_align_jumps = 16;
++      if (opts->x_align_functions <= 0)
++	opts->x_align_functions = 16;
++    }
+ 
+-struct loongarch_lo_sum_offset_hasher : free_ptr_hash <loongarch_lo_sum_offset>
+-{
+-  typedef rtx_def *compare_type;
+-  static inline hashval_t hash (const loongarch_lo_sum_offset *);
+-  static inline bool equal (const loongarch_lo_sum_offset *, const rtx_def *);
+-};
++  if (loongarch_veclibabi_name
++      && strcmp (loongarch_veclibabi_name, "sleef") != 0)
++    {
++      error ("unknown vectorization library ABI type (%qs) for "
++	     "%qs", loongarch_veclibabi_name, "-mveclibabi=");
++      inform (input_location,
++	      "valid arguments to %<-mveclibabi=%> are: %s", "sleef");
++    }
++  if (!ISA_HAS_LASX)
++    loongarch_stack_realign = 0;
+ 
+-/* Hash-table callbacks for loongarch_lo_sum_offsets.  */
++  /* -mrecip options.  */
++  static struct
++    {
++      const char *string;           /* option name */
++      unsigned int mask;            /* mask bits to set */
++    }
++  const recip_options[] =
++    {
++      { "all",       RECIP_MASK_ALL },
++      { "none",      RECIP_MASK_NONE },
++      { "div",       RECIP_MASK_DIV },
++      { "sqrt",      RECIP_MASK_SQRT },
++      { "rsqrt",     RECIP_MASK_RSQRT },
++      { "vec-div",   RECIP_MASK_VEC_DIV },
++      { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
++      { "vec-rsqrt", RECIP_MASK_VEC_RSQRT },
++    };
+ 
+-inline hashval_t
+-loongarch_lo_sum_offset_hasher::hash (const loongarch_lo_sum_offset *entry)
+-{
+-  return loongarch_hash_base (entry->base);
+-}
++  if (loongarch_recip_name)
++  {
++    char *p = ASTRDUP (loongarch_recip_name);
++    char *q;
++    unsigned int mask, i;
++    bool invert;
++
++    while ((q = strtok (p, ",")) != NULL)
++     {
++       p = NULL;
++       if (*q == '!')
++         {
++           invert = true;
++           q++;
++         }
++       else
++         invert = false;
++
++       if (!strcmp (q, "default"))
++         mask = RECIP_MASK_ALL;
++       else
++         {
++           for (i = 0; i < ARRAY_SIZE (recip_options); i++)
++             if (!strcmp (q, recip_options[i].string))
++               {
++                 mask = recip_options[i].mask;
++                 break;
++               }
++
++           if (i == ARRAY_SIZE (recip_options))
++             {
++               error ("unknown option for -mrecip=%s", q);
++               invert = false;
++               mask = RECIP_MASK_NONE;
++             }
++         }
++
++       if (invert)
++         recip_mask &= ~mask;
++       else
++         recip_mask |= mask;
++     }
++  }
++  if (loongarch_recip)
++    recip_mask |= RECIP_MASK_ALL;
+ 
+-inline bool
+-loongarch_lo_sum_offset_hasher::equal (const loongarch_lo_sum_offset *entry,
+-				  const rtx_def *value)
+-{
+-  return rtx_equal_p (entry->base, value);
+ }
+ 
+-typedef hash_table<loongarch_lo_sum_offset_hasher> loongarch_offset_table;
+-
+ 
+-/* Subroutine of loongarch_reorg to manage passes that require DF.  */
++/* Implement TARGET_OPTION_OVERRIDE.  */
+ 
+ static void
+-loongarch_df_reorg (void)
++loongarch_option_override (void)
+ {
+-  /* Create def-use chains.  */
+-  df_set_flags (DF_EQ_NOTES);
+-  df_chain_add_problem (DF_UD_CHAIN);
+-  df_analyze ();
+-
+-  if (TARGET_RELAX_PIC_CALLS)
+-    loongarch_annotate_pic_calls ();
+-
+-  df_finish_pass (false);
++  loongarch_option_override_internal (&global_options, &global_options_set);
+ }
+ 
+-
+-/* Implement TARGET_MACHINE_DEPENDENT_REORG.  */
++/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
+ 
+ static void
+-loongarch_reorg (void)
++loongarch_conditional_register_usage (void)
+ {
+-  /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF.DF insn info is only kept up
+-     to date if the CFG is available.  */
+-  if (loongarch_cfg_in_reorg ())
+-    compute_bb_for_insn ();
+-  if (loongarch_cfg_in_reorg ())
++  if (!TARGET_HARD_FLOAT)
+     {
+-      loongarch_df_reorg ();
+-      free_bb_for_insn ();
++      AND_COMPL_HARD_REG_SET (accessible_reg_set,
++                             reg_class_contents[(int) FP_REGS]);
++      AND_COMPL_HARD_REG_SET (accessible_reg_set,
++                             reg_class_contents[(int) FCC_REGS]);
+     }
++
+ }
+ 
+-/* We use a machine specific pass to do a second machine dependent reorg
+-   pass after delay branch scheduling.  */
++/* Implement EH_USES.  */
+ 
+-static unsigned int
+-loongarch_machine_reorg2 (void)
++bool
++loongarch_eh_uses (unsigned int regno ATTRIBUTE_UNUSED)
+ {
+-//  loongarch_insert_insn_pseudos ();
+-  return 0;
++  return false;
+ }
+ 
+-namespace {
+-
+-const pass_data pass_data_loongarch_machine_reorg2 =
+-{
+-  RTL_PASS, /* type */
+-  "mach2", /* name */
+-  OPTGROUP_NONE, /* optinfo_flags */
+-  TV_MACH_DEP, /* tv_id */
+-  0, /* properties_required */
+-  0, /* properties_provided */
+-  0, /* properties_destroyed */
+-  0, /* todo_flags_start */
+-  0, /* todo_flags_finish */
+-};
++/* Implement EPILOGUE_USES.  */
+ 
+-class pass_loongarch_machine_reorg2 : public rtl_opt_pass
++bool
++loongarch_epilogue_uses (unsigned int regno)
+ {
+-public:
+-  pass_loongarch_machine_reorg2(gcc::context *ctxt)
+-    : rtl_opt_pass(pass_data_loongarch_machine_reorg2, ctxt)
+-  {}
+-
+-  /* opt_pass methods: */
+-  virtual unsigned int execute (function *) { return loongarch_machine_reorg2 (); }
+-
+-}; // class pass_loongarch_machine_reorg2
+-
+-} // anon namespace
++  /* Say that the epilogue uses the return address register.  Note that
++     in the case of sibcalls, the values "used by the epilogue" are
++     considered live at the start of the called function.  */
++  if (regno == RETURN_ADDR_REGNUM)
++    return true;
+ 
+-rtl_opt_pass *
+-make_pass_loongarch_machine_reorg2 (gcc::context *ctxt)
+-{
+-  return new pass_loongarch_machine_reorg2 (ctxt);
++  return false;
+ }
+ 
+-
+-/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+-   in order to avoid duplicating too much logic from elsewhere.  */
+-
+-static void
+-loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+-		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+-		      tree function)
++bool
++loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
+ {
+-  rtx this_rtx, temp1, temp2, fnaddr;
+-  rtx_insn *insn;
+-  bool use_sibcall_p;
+-
+-  /* Pretend to be a post-reload pass while generating rtl.  */
+-  reload_completed = 1;
+-
+-  /* Mark the end of the (empty) prologue.  */
+-  emit_note (NOTE_INSN_PROLOGUE_END);
+-
+-  /* Determine if we can use a sibcall to call FUNCTION directly.  */
+-  fnaddr = XEXP (DECL_RTL (function), 0);
+-  use_sibcall_p = (loongarch_function_ok_for_sibcall (function, NULL)
+-		   && const_call_insn_operand (fnaddr, Pmode));
+-
+-//  /* Determine if we need to load FNADDR from the GOT.  */
+-//  if (!use_sibcall_p
+-//      && (loongarch_got_symbol_type_p
+-//	  (loongarch_classify_symbol (fnaddr, SYMBOL_CONTEXT_LEA))))
+-//    {
+-//      /* Pick a global pointer.  Use a call-clobbered register if
+-//	 TARGET_CALL_SAVED_GP.  */
+-//      cfun->machine->global_pointer
+-//	= GLOBAL_POINTER_REGNUM;
+-//      cfun->machine->must_initialize_gp_p = true;
+-//      SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer);
+-//
+-//      /* Set up the global pointer for n32 or n64 abicalls.  */
+-//      loongarch_emit_loadgp ();
+-//    }
+-
+-  /* We need two temporary registers in some cases.  */
+-  temp1 = gen_rtx_REG (Pmode, 12);
+-  temp2 = gen_rtx_REG (Pmode, 13);
+-
+-  /* Find out which register contains the "this" pointer.  */
+-  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+-    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
+-  else
+-    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+-
+-  /* Add DELTA to THIS_RTX.  */
+-  if (delta != 0)
+-    {
+-      rtx offset = GEN_INT (delta);
+-      if (!SMALL_OPERAND (delta))
+-	{
+-	  loongarch_emit_move (temp1, offset);
+-	  offset = temp1;
+-	}
+-      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+-    }
+-
+-  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+-  if (vcall_offset != 0)
+-    {
+-      rtx addr;
+-
+-      /* Set TEMP1 to *THIS_RTX.  */
+-      loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
+-
+-      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+-      addr = loongarch_add_offset (temp2, temp1, vcall_offset);
+-
+-      /* Load the offset and add it to THIS_RTX.  */
+-      loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
+-      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+-    }
++  rtx reg1, reg2, mem1, mem2, base1, base2;
++  enum reg_class rc1, rc2;
++  HOST_WIDE_INT offset1, offset2;
+ 
+-  /* Jump to the target function.  Use a sibcall if direct jumps are
+-     allowed, otherwise load the address into a register first.  */
+-  if (use_sibcall_p)
++  if (load_p)
+     {
+-      insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
+-      SIBLING_CALL_P (insn) = 1;
++      reg1 = operands[0];
++      reg2 = operands[2];
++      mem1 = operands[1];
++      mem2 = operands[3];
+     }
+   else
+     {
+-      loongarch_emit_move (temp1, fnaddr);
+-      emit_jump_insn (gen_indirect_jump (temp1));
++      reg1 = operands[1];
++      reg2 = operands[3];
++      mem1 = operands[0];
++      mem2 = operands[2];
+     }
+ 
+-  /* Run just enough of rest_of_compilation.  This sequence was
+-     "borrowed" from alpha.c.  */
+-  insn = get_insns ();
+-  split_all_insns_noflow ();
+-  shorten_branches (insn);
+-  final_start_function (insn, file, 1);
+-  final (insn, file, 1);
+-  final_end_function ();
++  if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0
++      || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0)
++    return false;
+ 
+-  /* Clean up the vars set above.  Note that final_end_function resets
+-     the global pointer for us.  */
+-  reload_completed = 0;
+-}
+-
++  loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
++  loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
+ 
+-/* Allocate a chunk of memory for per-function machine-dependent data.  */
++  /* Base regs do not match.  */
++  if (!REG_P (base1) || !rtx_equal_p (base1, base2))
++    return false;
+ 
+-static struct machine_function *
+-loongarch_init_machine_status (void)
+-{
+-  return ggc_cleared_alloc<machine_function> ();
+-}
++  /* Either of the loads is clobbering base register.  It is legitimate to bond
++     loads if second load clobbers base register.  However, hardware does not
++     support such bonding.  */
++  if (load_p
++      && (REGNO (reg1) == REGNO (base1) || (REGNO (reg2) == REGNO (base1))))
++    return false;
+ 
+-/* Return the processor associated with the given ISA level, or null
+-   if the ISA isn't valid.  */
++  /* Loading in same registers.  */
++  if (load_p && REGNO (reg1) == REGNO (reg2))
++    return false;
+ 
+-static const struct loongarch_cpu_info *
+-loongarch_cpu_info_from_isa (int isa)
+-{
+-  unsigned int i;
++  /* The loads/stores are not of same type.  */
++  rc1 = REGNO_REG_CLASS (REGNO (reg1));
++  rc2 = REGNO_REG_CLASS (REGNO (reg2));
++  if (rc1 != rc2 && !reg_class_subset_p (rc1, rc2)
++      && !reg_class_subset_p (rc2, rc1))
++    return false;
+ 
+-  for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++)
+-    if (loongarch_cpu_info_table[i].isa == isa)
+-      return loongarch_cpu_info_table + i;
++  if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
++    return false;
+ 
+-  return NULL;
++  return true;
+ }
+ 
+-/* Return a loongarch_cpu_info entry determined by an option valued
+-   OPT.  */
++/* Implement TARGET_TRAMPOLINE_INIT.  */
+ 
+-static const struct loongarch_cpu_info *
+-loongarch_cpu_info_from_opt (int opt)
++static void
++loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+ {
+-  switch (opt)
+-    {
+-    case LARCH_ARCH_OPTION_NATIVE:
+-      gcc_unreachable ();
+-
+-    default:
+-      return &loongarch_cpu_info_table[opt];
+-    }
+-}
++  rtx addr, end_addr, mem;
++  rtx trampoline[8];
++  unsigned int i, j;
++  HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
+ 
+-/* Return a default loongarch_cpu_info entry, given that no -march= option
+-   was explicitly specified.  */
++  /* Work out the offsets of the pointers from the start of the
++     trampoline code.  */
++  end_addr_offset = TRAMPOLINE_CODE_SIZE;
++  static_chain_offset = end_addr_offset;
++  target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
+ 
+-static const struct loongarch_cpu_info *
+-loongarch_default_arch (void)
+-{
+-#if defined (LARCH_CPU_STRING_DEFAULT)
+-  unsigned int i;
+-  for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++)
+-    if (strcmp (loongarch_cpu_info_table[i].name, LARCH_CPU_STRING_DEFAULT) == 0)
+-      return loongarch_cpu_info_table + i;
+-  gcc_unreachable ();
+-#elif defined (LARCH_ISA_DEFAULT)
+-  return loongarch_cpu_info_from_isa (LARCH_ISA_DEFAULT);
+-#else
+-  gcc_unreachable ();
+-#endif
+-}
++  /* Get pointers to the beginning and end of the code block.  */
++  addr = force_reg (Pmode, XEXP (m_tramp, 0));
++  end_addr
++    = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
+ 
+-/* Set up globals to generate code for the ISA or processor
+-   described by INFO.  */
++#define OP(X) gen_int_mode (X, SImode)
+ 
+-static void
+-loongarch_set_architecture (const struct loongarch_cpu_info *info)
+-{
+-  if (info != 0)
+-    {
+-      loongarch_arch_info = info;
+-      loongarch_arch = info->cpu;
+-      loongarch_isa = info->isa;
+-      if (loongarch_isa < 32)
+-	loongarch_isa_rev = 0;
+-      else
+-	loongarch_isa_rev = (loongarch_isa & 31) + 1;
+-    }
+-}
++  /* Build up the code in TRAMPOLINE.  */
++  i = 0;
++  /*pcaddi $static_chain,0
++    ld.[dw] $tmp,$static_chain,target_function_offset
++    ld.[dw] $static_chain,$static_chain,static_chain_offset
++    jirl $r0,$tmp,0  */
++  trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
++  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
++			| 19 /* $t7 */
++			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
++			| ((target_function_offset & 0xfff) << 10));
++  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
++			| (STATIC_CHAIN_REGNUM - GP_REG_FIRST)
++			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
++			| ((static_chain_offset & 0xfff) << 10));
++  trampoline[i++] = OP (0x4c000000 | (19 << 5));
++#undef OP
+ 
+-/* Likewise for tuning.  */
++  for (j = 0; j < i; j++)
++   {
++     mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
++     loongarch_emit_move (mem, trampoline[j]);
++   }
+ 
+-static void
+-loongarch_set_tune (const struct loongarch_cpu_info *info)
+-{
+-  if (info != 0)
+-    {
+-      loongarch_tune_info = info;
+-      loongarch_tune = info->cpu;
+-    }
+-}
++  /* Set up the static chain pointer field.  */
++  mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
++  loongarch_emit_move (mem, chain_value);
+ 
+-/* Implement TARGET_OPTION_OVERRIDE.  */
++  /* Set up the target function field.  */
++  mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
++  loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
+ 
+-static void
+-loongarch_option_override (void)
+-{
+-  int i, start, regno, mode;
++  /* Flush the code part of the trampoline.  */
++  emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
++  emit_insn (gen_clear_cache (addr, end_addr));
++}
+ 
+-#ifdef SUBTARGET_OVERRIDE_OPTIONS
+-  SUBTARGET_OVERRIDE_OPTIONS;
+-#endif
++/* Generate or test for an insn that supports a constant permutation.  */
+ 
++#define MAX_VECT_LEN 32
+ 
+-  /* -mno-float overrides -mhard-float and -msoft-float.  */
+-  if (TARGET_NO_FLOAT)
+-    {
+-      target_flags |= MASK_SOFT_FLOAT_ABI;
+-      target_flags_explicit |= MASK_SOFT_FLOAT_ABI;
+-    }
+-
+-
+-  /* Set the small data limit.  */
+-  loongarch_small_data_threshold = (global_options_set.x_g_switch_value
+-			       ? g_switch_value
+-			       : LARCH_DEFAULT_GVALUE);
+-
+-  /* The following code determines the architecture and register size.
+-     Similar code was added to GAS 2.14 (see tc-loongarch.c:md_after_parse_args()).
+-     The GAS and GCC code should be kept in sync as much as possible.  */
+-
+-  if (global_options_set.x_loongarch_arch_option)
+-    loongarch_set_architecture (loongarch_cpu_info_from_opt (loongarch_arch_option));
++struct expand_vec_perm_d
++{
++  rtx target, op0, op1;
++  unsigned char perm[MAX_VECT_LEN];
++  machine_mode vmode;
++  unsigned char nelt;
++  bool one_vector_p;
++  bool testing_p;
++};
+ 
+-  if (loongarch_arch_info == 0)
+-    loongarch_set_architecture (loongarch_default_arch ());
++/* Construct (set target (vec_select op0 (parallel perm))) and
++   return true if that's a valid instruction in the active ISA.  */
+ 
+-  /* Optimize for loongarch_arch, unless -mtune selects a different processor.  */
+-  if (global_options_set.x_loongarch_tune_option)
+-    loongarch_set_tune (loongarch_cpu_info_from_opt (loongarch_tune_option));
++static bool
++loongarch_expand_vselect (rtx target, rtx op0,
++		     const unsigned char *perm, unsigned nelt)
++{
++  rtx rperm[MAX_VECT_LEN], x;
++  rtx_insn *insn;
++  unsigned i;
+ 
+-  if (loongarch_tune_info == 0)
+-    loongarch_set_tune (loongarch_arch_info);
++  for (i = 0; i < nelt; ++i)
++    rperm[i] = GEN_INT (perm[i]);
+ 
+-  if ((target_flags_explicit & MASK_64BIT) == 0)
+-    {
+-      /* Infer the integer register size from the ABI and processor.
+-	 Restrict ourselves to 32-bit registers if that's all the
+-	 processor has, or if the ABI cannot handle 64-bit registers.  */
+-      if (loongarch_abi == ABILP32)
+-	target_flags &= ~MASK_64BIT;
+-      else
+-	target_flags |= MASK_64BIT;
+-    }
++  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
++  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
++  x = gen_rtx_SET (target, x);
+ 
+-  if ((target_flags_explicit & MASK_FLOAT64) != 0)
+-    {
+-      if (TARGET_SINGLE_FLOAT && TARGET_FLOAT64)
+-	error ("unsupported combination: %s", "-mfp64 -msingle-float");
+-    }
+-  else
++  insn = emit_insn (x);
++  if (recog_memoized (insn) < 0)
+     {
+-      /* -msingle-float selects 32-bit float registers.  On r6 and later,
+-	 -mdouble-float selects 64-bit float registers, since the old paired
+-	 register model is not supported.  In other cases the float registers
+-	 should be the same size as the integer ones.  */
+-      if (TARGET_64BIT && TARGET_DOUBLE_FLOAT)
+-	target_flags |= MASK_FLOAT64;
+-      else if (loongarch_abi == ABILP32 && ISA_HAS_LSX)
+-	target_flags |= MASK_FLOAT64;
+-      else
+-	target_flags &= ~MASK_FLOAT64;
++      remove_insn (insn);
++      return false;
+     }
++  return true;
++}
+ 
+-  /* End of code shared with GAS.  */
+-
+-  if (!TARGET_OLDABI)
+-    flag_pcc_struct_return = 0;
++/* Similar, but generate a vec_concat from op0 and op1 as well.  */
+ 
+-  /* Decide which rtx_costs structure to use.  */
+-  if (optimize_size)
+-    loongarch_cost = &loongarch_rtx_cost_optimize_size;
+-  else
+-    loongarch_cost = &loongarch_rtx_cost_data[loongarch_tune];
++static bool
++loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
++			     const unsigned char *perm, unsigned nelt)
++{
++  machine_mode v2mode;
++  rtx x;
+ 
+-  /* If the user hasn't specified a branch cost, use the processor's
+-     default.  */
+-  if (loongarch_branch_cost == 0)
+-    loongarch_branch_cost = loongarch_cost->branch_cost;
++  if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
++    return false;
++  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
++  return loongarch_expand_vselect (target, x, perm, nelt);
++}
+ 
+-  /* Prefer a call to memcpy over inline code when optimizing for size,
+-     though see MOVE_RATIO in loongarch.h.  */
+-  if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0)
+-    target_flags |= MASK_MEMCPY;
++/* Construct (set target (vec_select op0 (parallel selector))) and
++   return true if that's a valid instruction in the active ISA.  */
+ 
+-  /* If we have a nonzero small-data limit, check that the -mgpopt
+-     setting is consistent with the other target flags.  */
+-  if (loongarch_small_data_threshold > 0)
+-    {
+-      if (TARGET_VXWORKS_RTP)
+-	warning (0, "cannot use small-data accesses for %qs", "-mrtp");
+-    }
++static bool
++loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
++{
++  rtx x, elts[MAX_VECT_LEN];
++  rtvec v;
++  rtx_insn *insn;
++  unsigned i;
+ 
+-  /* Make sure that when ISA_HAS_LSX is true, TARGET_FLOAT64 and
+-     TARGET_HARD_FLOAT_ABI and  both true.  */
+-  if (ISA_HAS_LSX && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI))
+-    error ("%<-mlsx%> must be used with %<-mfp64%> and %<-mhard-float%>");
++  if (!ISA_HAS_LSX && !ISA_HAS_LASX)
++    return false;
+ 
+-  /* If TARGET_LASX, enable TARGET_LSX.  */
+-  if (TARGET_LASX)
+-    target_flags |= MASK_LSX;
++  for (i = 0; i < d->nelt; i++)
++    elts[i] = GEN_INT (d->perm[i]);
+ 
+-  /* .cfi_* directives generate a read-only section, so fall back on
+-     manual .eh_frame creation if we need the section to be writable.  */
+-  if (TARGET_WRITABLE_EH_FRAME)
+-    flag_dwarf2_cfi_asm = 0;
++  v = gen_rtvec_v (d->nelt, elts);
++  x = gen_rtx_PARALLEL (VOIDmode, v);
+ 
+-  loongarch_init_print_operand_punct ();
++  if (!loongarch_const_vector_shuffle_set_p (x, d->vmode))
++    return false;
+ 
+-  /* Set up array to map GCC register number to debug register number.
+-     Ignore the special purpose register numbers.  */
++  x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
++  x = gen_rtx_SET (d->target, x);
+ 
+-  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++  insn = emit_insn (x);
++  if (recog_memoized (insn) < 0)
+     {
+-      loongarch_dbx_regno[i] = IGNORED_DWARF_REGNUM;
+-      if (GP_REG_P (i) || FP_REG_P (i))
+-	loongarch_dwarf_regno[i] = i;
+-      else
+-	loongarch_dwarf_regno[i] = INVALID_REGNUM;
++      remove_insn (insn);
++      return false;
+     }
++  return true;
++}
+ 
+-  start = GP_DBX_FIRST - GP_REG_FIRST;
+-  for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++)
+-    loongarch_dbx_regno[i] = i + start;
+-
+-  start = FP_DBX_FIRST - FP_REG_FIRST;
+-  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+-    loongarch_dbx_regno[i] = i + start;
+-
+-  /* Set up loongarch_hard_regno_mode_ok.  */
+-  for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
+-    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+-      loongarch_hard_regno_mode_ok_p[mode][regno]
+-	= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
++/* Try to simplify a two vector permutation using 2 intra-lane interleave
++   insns and cross-lane shuffle for 32-byte vectors.  */
+ 
+-  /* Function to allocate machine-dependent function status.  */
+-  init_machine_status = &loongarch_init_machine_status;
+-  target_flags &= ~MASK_RELAX_PIC_CALLS;
+-
+-  /* We register a second machine specific reorg pass after delay slot
+-     filling.  Registering the pass must be done at start up.  It's
+-     convenient to do it here.  */
+-  opt_pass *new_pass = make_pass_loongarch_machine_reorg2 (g);
+-  struct register_pass_info insert_pass_loongarch_machine_reorg2 =
+-    {
+-      new_pass,		/* pass */
+-      "dbr",			/* reference_pass_name */
+-      1,			/* ref_pass_instance_number */
+-      PASS_POS_INSERT_AFTER	/* po_op */
+-    };
+-  register_pass (&insert_pass_loongarch_machine_reorg2);
++static bool
++loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d)
++{
++  unsigned i, nelt;
++  rtx t1,t2,t3;
++  rtx (*gen_high) (rtx, rtx, rtx);
++  rtx (*gen_low) (rtx, rtx, rtx);
++  machine_mode mode = GET_MODE (d->target);
+ 
+-  loongarch_register_frame_header_opt ();
+-}
++  if (d->one_vector_p)
++    return false;
++  if (ISA_HAS_LASX && GET_MODE_SIZE (d->vmode) == 32)
++    ;
++  else
++    return false;
+ 
++  nelt = d->nelt;
++  if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
++    return false;
++  for (i = 0; i < nelt; i += 2)
++    if (d->perm[i] != d->perm[0] + i / 2
++	|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
++      return false;
+ 
+-/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
++  if (d->testing_p)
++    return true;
+ 
+-static void
+-loongarch_conditional_register_usage (void)
+-{
+-  if (!TARGET_HARD_FLOAT)
++  switch (d->vmode)
+     {
+-      AND_COMPL_HARD_REG_SET (accessible_reg_set,
+-			      reg_class_contents[(int) FP_REGS]);
+-      AND_COMPL_HARD_REG_SET (accessible_reg_set,
+-			      reg_class_contents[(int) ST_REGS]);
++    case E_V32QImode:
++      gen_high = gen_lasx_xvilvh_b;
++      gen_low = gen_lasx_xvilvl_b;
++      break;
++    case E_V16HImode:
++      gen_high = gen_lasx_xvilvh_h;
++      gen_low = gen_lasx_xvilvl_h;
++      break;
++    case E_V8SImode:
++      gen_high = gen_lasx_xvilvh_w;
++      gen_low = gen_lasx_xvilvl_w;
++      break;
++    case E_V4DImode:
++      gen_high = gen_lasx_xvilvh_d;
++      gen_low = gen_lasx_xvilvl_d;
++      break;
++    case E_V8SFmode:
++      gen_high = gen_lasx_xvilvh_w_f;
++      gen_low = gen_lasx_xvilvl_w_f;
++      break;
++    case E_V4DFmode:
++      gen_high = gen_lasx_xvilvh_d_f;
++      gen_low = gen_lasx_xvilvl_d_f;
++      break;
++    default:
++      gcc_unreachable ();
+     }
+-}
+ 
+-/* Implement EH_USES.  */
+-
+-bool
+-loongarch_eh_uses (unsigned int regno)
+-{
+-  return false;
++  t1 = gen_reg_rtx (mode);
++  t2 = gen_reg_rtx (mode);
++  emit_insn (gen_high (t1, d->op0, d->op1));
++  emit_insn (gen_low (t2, d->op0, d->op1));
++  if(mode == V4DFmode || mode == V8SFmode)
++    {
++      t3 = gen_reg_rtx (V4DFmode);
++      if (d->perm[0])
++	emit_insn(gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
++					   gen_lowpart (V4DFmode, t2),GEN_INT(0x31)));
++      else
++	emit_insn(gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
++					   gen_lowpart (V4DFmode, t2),GEN_INT(0x20)));
++    }
++  else
++    {
++      t3 = gen_reg_rtx (V4DImode);
++      if (d->perm[0])
++	emit_insn(gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
++					   gen_lowpart (V4DImode, t2),GEN_INT(0x31)));
++      else
++	emit_insn(gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
++					   gen_lowpart (V4DImode, t2),GEN_INT(0x20)));
++    }
++  emit_move_insn (d->target, gen_lowpart (mode, t3));
++  return true;
+ }
+ 
+-/* Implement EPILOGUE_USES.  */
++/* Implement extract-even and extract-odd permutations.*/
+ 
+-bool
+-loongarch_epilogue_uses (unsigned int regno)
++static bool
++loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
+ {
+-  /* Say that the epilogue uses the return address register.  Note that
+-     in the case of sibcalls, the values "used by the epilogue" are
+-     considered live at the start of the called function.  */
+-  if (regno == RETURN_ADDR_REGNUM)
+-    return true;
++  rtx t1;
++  machine_mode mode = GET_MODE (d->target);
++  t1 = gen_reg_rtx (mode);
+ 
+-  /* An interrupt handler must preserve some registers that are
+-     ordinarily call-clobbered.  */
+-  if (cfun->machine->interrupt_handler_p
+-      && loongarch_interrupt_extra_call_saved_reg_p (regno))
++  if (d->testing_p)
+     return true;
+ 
+-  return false;
+-}
++  switch (d->vmode)
++    {
++    case E_V4DFmode:
++      /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }.  */
++      if (odd)
++	emit_insn (gen_lasx_xvilvh_d_f (t1, d->op0, d->op1));
++      else
++	emit_insn (gen_lasx_xvilvl_d_f (t1, d->op0, d->op1));
+ 
+-/* Return true if MEM1 and MEM2 use the same base register, and the
+-   offset of MEM2 equals the offset of MEM1 plus 4.  FIRST_REG is the
+-   register into (from) which the contents of MEM1 will be loaded
+-   (stored), depending on the value of LOAD_P.
+-   SWAP_P is true when the 1st and 2nd instructions are swapped.  */
++      /* Shuffle within the 256-bit lanes to produce the result required.
++	 { 0 2 4 6 } | { 1 3 5 7 }.  */
++      emit_insn (gen_lasx_xvpermi_d_v4df (d->target, t1, GEN_INT (0xd8)));
++      break;
+ 
+-static bool
+-loongarch_load_store_pair_p_1 (bool load_p, bool swap_p,
+-			   rtx first_reg, rtx mem1, rtx mem2)
+-{
+-  rtx base1, base2;
+-  HOST_WIDE_INT offset1, offset2;
++    case E_V4DImode:
++      if (odd)
++	emit_insn (gen_lasx_xvilvh_d (t1, d->op0, d->op1));
++      else
++	emit_insn (gen_lasx_xvilvl_d (t1, d->op0, d->op1));
+ 
+-  if (!MEM_P (mem1) || !MEM_P (mem2))
+-    return false;
++      emit_insn (gen_lasx_xvpermi_d_v4di (d->target, t1, GEN_INT (0xd8)));
++      break;
+ 
+-  loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
+-  loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
++    case E_V8SFmode:
++      /* Shuffle the lanes around into:
++	 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }.  */
++      if (odd)
++	emit_insn (gen_lasx_xvpickod_w_f (t1, d->op0, d->op1));
++      else
++	emit_insn (gen_lasx_xvpickev_w_f (t1, d->op0, d->op1));
+ 
+-  if (!REG_P (base1) || !rtx_equal_p (base1, base2))
+-    return false;
++      /* Shuffle within the 256-bit lanes to produce the result required.
++	 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }.  */
++      emit_insn (gen_lasx_xvpermi_d_v8sf (d->target, t1, GEN_INT (0xd8)));
++      break;
+ 
+-  /* Avoid invalid load pair instructions.  */
+-  if (load_p && REGNO (first_reg) == REGNO (base1))
+-    return false;
++    case E_V8SImode:
++      if (odd)
++	emit_insn (gen_lasx_xvpickod_w (t1, d->op0, d->op1));
++      else
++	emit_insn (gen_lasx_xvpickev_w (t1, d->op0, d->op1));
+ 
+-  /* We must avoid this case for anti-dependence.
+-     Ex:  lw $3, 4($3)
+-          lw $2, 0($3)
+-     first_reg is $2, but the base is $3.  */
+-  if (load_p
+-      && swap_p
+-      && REGNO (first_reg) + 1 == REGNO (base1))
+-    return false;
++      emit_insn (gen_lasx_xvpermi_d_v8si (d->target, t1, GEN_INT (0xd8)));
++      break;
+ 
+-  if (offset2 != offset1 + 4)
+-    return false;
++    case E_V16HImode:
++      if (odd)
++	emit_insn (gen_lasx_xvpickod_h (t1, d->op0, d->op1));
++      else
++	emit_insn (gen_lasx_xvpickev_h (t1, d->op0, d->op1));
+ 
+-  if (!ULARCH_12BIT_OFFSET_P (offset1))
+-    return false;
++      emit_insn (gen_lasx_xvpermi_d_v16hi (d->target, t1, GEN_INT (0xd8)));
++      break;
+ 
+-  return true;
+-}
++    case E_V32QImode:
++      if (odd)
++	emit_insn (gen_lasx_xvpickod_b (t1, d->op0, d->op1));
++      else
++	emit_insn (gen_lasx_xvpickev_b (t1, d->op0, d->op1));
+ 
+-bool
+-loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
+-{
+-  rtx reg1, reg2, mem1, mem2, base1, base2;
+-  enum reg_class rc1, rc2;
+-  HOST_WIDE_INT offset1, offset2;
++      emit_insn (gen_lasx_xvpermi_d_v32qi (d->target, t1, GEN_INT (0xd8)));
++      break;
+ 
+-  if (load_p)
+-    {
+-      reg1 = operands[0];
+-      reg2 = operands[2];
+-      mem1 = operands[1];
+-      mem2 = operands[3];
+-    }
+-  else
+-    {
+-      reg1 = operands[1];
+-      reg2 = operands[3];
+-      mem1 = operands[0];
+-      mem2 = operands[2];
++    default:
++      gcc_unreachable ();
+     }
+ 
+-  if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0
+-      || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0)
+-    return false;
+-
+-  loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
+-  loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
+-
+-  /* Base regs do not match.  */
+-  if (!REG_P (base1) || !rtx_equal_p (base1, base2))
+-    return false;
++  return true;
++}
+ 
+-  /* Either of the loads is clobbering base register.  It is legitimate to bond
+-     loads if second load clobbers base register.  However, hardware does not
+-     support such bonding.  */
+-  if (load_p
+-      && (REGNO (reg1) == REGNO (base1)
+-	  || (REGNO (reg2) == REGNO (base1))))
+-    return false;
++/* Pattern match extract-even and extract-odd permutations.  */
+ 
+-  /* Loading in same registers.  */
+-  if (load_p
+-      && REGNO (reg1) == REGNO (reg2))
++static bool
++loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
++{
++  unsigned i, odd, nelt = d->nelt;
++  if(!ISA_HAS_LASX)
+     return false;
+ 
+-  /* The loads/stores are not of same type.  */
+-  rc1 = REGNO_REG_CLASS (REGNO (reg1));
+-  rc2 = REGNO_REG_CLASS (REGNO (reg2));
+-  if (rc1 != rc2
+-      && !reg_class_subset_p (rc1, rc2)
+-      && !reg_class_subset_p (rc2, rc1))
++  odd = d->perm[0];
++  if (odd != 0 && odd != 1)
+     return false;
+ 
+-  if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
+-    return false;
++  for (i = 1; i < nelt; ++i)
++    if (d->perm[i] != 2 * i + odd)
++      return false;
+ 
+-  return true;
++  return loongarch_expand_vec_perm_even_odd_1 (d, odd);
+ }
+ 
+-/* OPERANDS describes the operands to a pair of SETs, in the order
+-   dest1, src1, dest2, src2.  Return true if the operands can be used
+-   in an LWP or SWP instruction; LOAD_P says which.  */
++/* Expand a variable vector permutation for LASX.  */
+ 
+-bool
+-loongarch_load_store_pair_p (bool load_p, rtx *operands)
++void
++loongarch_expand_vec_perm_1 (rtx operands[])
+ {
+-  rtx reg1, reg2, mem1, mem2;
++  rtx target = operands[0];
++  rtx op0 = operands[1];
++  rtx op1 = operands[2];
++  rtx mask = operands[3];
++  bool one_operand_shuffle = rtx_equal_p (op0, op1);
++  rtx t1, t2, t3, t4, t5, t6, vt, vec[32];
++  machine_mode mode = GET_MODE (op0);
++  machine_mode maskmode = GET_MODE (mask);
++  int w, i;
++
++  /* Number of elements in the vector.  */
++  w = GET_MODE_NUNITS (mode);
++
++  if (mode == V4DImode || mode == V4DFmode)
++    {
++      maskmode = mode = V8SImode;
++      w = 8;
++      t1 = gen_reg_rtx (maskmode);
++
++      /* Replicate the low bits of the V4DImode mask into V8SImode:
++	 mask = { A B C D }
++	 t1 = { A A B B C C D D }.  */
++      for (i = 0; i < w / 2; ++i)
++	vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
++      vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
++      vt = force_reg (maskmode, vt);
++      mask = gen_lowpart (maskmode, mask);
++      emit_insn (gen_lasx_xvperm_w (t1, mask, vt));
++
++      /* Multiply the shuffle indicies by two.  */
++      t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
++				OPTAB_DIRECT);
++
++      /* Add one to the odd shuffle indicies:
++	 t1 = { A*2, A*2+1, B*2, B*2+1, ... }.  */
++      for (i = 0; i < w / 2; ++i)
++	{
++	  vec[i * 2] = const0_rtx;
++	  vec[i * 2 + 1] = const1_rtx;
++	}
++      vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
++      vt = validize_mem (force_const_mem (maskmode, vt));
++      t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
++				OPTAB_DIRECT);
+ 
+-  if (load_p)
+-    {
+-      reg1 = operands[0];
+-      reg2 = operands[2];
+-      mem1 = operands[1];
+-      mem2 = operands[3];
++      /* Continue as if V8SImode (resp. V32QImode) was used initially.  */
++      operands[3] = mask = t1;
++      target = gen_reg_rtx (mode);
++      op0 = gen_lowpart (mode, op0);
++      op1 = gen_lowpart (mode, op1);
+     }
+-  else
++  switch (mode)
+     {
+-      reg1 = operands[1];
+-      reg2 = operands[3];
+-      mem1 = operands[0];
+-      mem2 = operands[2];
++    case E_V8SImode:
++      if (one_operand_shuffle)
++	{
++	  emit_insn (gen_lasx_xvperm_w (target, op0, mask));
++	  if (target != operands[0])
++	    emit_move_insn (operands[0],
++			    gen_lowpart (GET_MODE (operands[0]), target));
++	}
++      else
++	{
++	  t1 = gen_reg_rtx (V8SImode);
++	  t2 = gen_reg_rtx (V8SImode);
++	  emit_insn (gen_lasx_xvperm_w (t1, op0, mask));
++	  emit_insn (gen_lasx_xvperm_w (t2, op1, mask));
++	  goto merge_two;
++	}
++      return;
++
++    case E_V8SFmode:
++      mask = gen_lowpart (V8SImode, mask);
++      if (one_operand_shuffle)
++	emit_insn (gen_lasx_xvperm_w_f (target, op0, mask));
++      else
++	{
++	  t1 = gen_reg_rtx (V8SFmode);
++	  t2 = gen_reg_rtx (V8SFmode);
++	  emit_insn (gen_lasx_xvperm_w_f (t1, op0, mask));
++	  emit_insn (gen_lasx_xvperm_w_f (t2, op1, mask));
++	  goto merge_two;
++	}
++      return;
++
++    case E_V16HImode:
++      if (one_operand_shuffle)
++	{
++	  t1 = gen_reg_rtx (V16HImode);
++	  t2 = gen_reg_rtx (V16HImode);
++	  emit_insn (gen_lasx_xvpermi_d_v16hi (t1, op0, GEN_INT(0x44)));
++	  emit_insn (gen_lasx_xvpermi_d_v16hi (t2, op0, GEN_INT(0xee)));
++	  emit_insn (gen_lasx_xvshuf_h (target, mask, t2, t1));
++	}
++      else
++	{
++	  t1 = gen_reg_rtx (V16HImode);
++	  t2 = gen_reg_rtx (V16HImode);
++	  t3 = gen_reg_rtx (V16HImode);
++	  t4 = gen_reg_rtx (V16HImode);
++	  t5 = gen_reg_rtx (V16HImode);
++	  t6 = gen_reg_rtx (V16HImode);
++	  emit_insn (gen_lasx_xvpermi_d_v16hi (t3, op0, GEN_INT(0x44)));
++	  emit_insn (gen_lasx_xvpermi_d_v16hi (t4, op0, GEN_INT(0xee)));
++	  emit_insn (gen_lasx_xvshuf_h (t1, mask, t4, t3));
++	  emit_insn (gen_lasx_xvpermi_d_v16hi (t5, op1, GEN_INT(0x44)));
++	  emit_insn (gen_lasx_xvpermi_d_v16hi (t6, op1, GEN_INT(0xee)));
++	  emit_insn (gen_lasx_xvshuf_h (t2, mask, t6, t5));
++	  goto merge_two;
++	}
++      return;
++
++    case E_V32QImode:
++      if (one_operand_shuffle)
++	{
++	  t1 = gen_reg_rtx (V32QImode);
++	  t2 = gen_reg_rtx (V32QImode);
++	  emit_insn (gen_lasx_xvpermi_d_v32qi (t1, op0, GEN_INT(0x44)));
++	  emit_insn (gen_lasx_xvpermi_d_v32qi (t2, op0, GEN_INT(0xee)));
++	  emit_insn (gen_lasx_xvshuf_b (target, t2, t1, mask));
++	}
++      else
++	{
++	  t1 = gen_reg_rtx (V32QImode);
++	  t2 = gen_reg_rtx (V32QImode);
++	  t3 = gen_reg_rtx (V32QImode);
++	  t4 = gen_reg_rtx (V32QImode);
++	  t5 = gen_reg_rtx (V32QImode);
++	  t6 = gen_reg_rtx (V32QImode);
++	  emit_insn (gen_lasx_xvpermi_d_v32qi (t3, op0, GEN_INT(0x44)));
++	  emit_insn (gen_lasx_xvpermi_d_v32qi (t4, op0, GEN_INT(0xee)));
++	  emit_insn (gen_lasx_xvshuf_b (t1, t4, t3, mask));
++	  emit_insn (gen_lasx_xvpermi_d_v32qi (t5, op1, GEN_INT(0x44)));
++	  emit_insn (gen_lasx_xvpermi_d_v32qi (t6, op1, GEN_INT(0xee)));
++	  emit_insn (gen_lasx_xvshuf_b (t2, t6, t5, mask));
++	  goto merge_two;
++	}
++      return;
++
++    default:
++      gcc_assert (GET_MODE_SIZE (mode) == 32);
++      break;
+     }
+ 
+-  if (REGNO (reg2) == REGNO (reg1) + 1)
+-    return loongarch_load_store_pair_p_1 (load_p, false, reg1, mem1, mem2);
++merge_two:
++  /* Then merge them together.  The key is whether any given control
++     element contained a bit set that indicates the second word.  */
++  rtx xops[6];
++  mask = operands[3];
++  vt = GEN_INT (w);
++  vt = gen_const_vec_duplicate (maskmode, vt);
++  vt = force_reg (maskmode, vt);
++  if (GET_MODE (target) != mode)
++    target = gen_reg_rtx (mode);
++  xops[0] = target;
++  xops[1] = gen_lowpart (mode, t2);
++  xops[2] = gen_lowpart (mode, t1);
++  xops[3] = gen_rtx_GE (maskmode, mask, vt);
++  xops[4] = mask;
++  xops[5] = vt;
++
++  loongarch_expand_vec_cond_expr (mode, maskmode, xops);
++  if (target != operands[0])
++    emit_move_insn (operands[0],
++		    gen_lowpart (GET_MODE (operands[0]), target));
++}
+ 
+-  if (REGNO (reg1) == REGNO (reg2) + 1)
+-    return loongarch_load_store_pair_p_1 (load_p, true, reg2, mem2, mem1);
++void
++loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
++{
++  machine_mode vmode = GET_MODE (target);
+ 
+-  return false;
++  gcc_checking_assert (vmode == E_V16QImode
++      || vmode == E_V2DImode || vmode == E_V2DFmode
++      || vmode == E_V4SImode || vmode == E_V4SFmode
++      || vmode == E_V8HImode);
++  gcc_checking_assert (GET_MODE (op0) == vmode);
++  gcc_checking_assert (GET_MODE (op1) == vmode);
++  gcc_checking_assert (GET_MODE (sel) == vmode);
++  gcc_checking_assert (ISA_HAS_LSX);
++
++  switch (vmode)
++    {
++    case E_V16QImode:
++      emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
++      break;
++    case E_V2DFmode:
++      emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
++      break;
++    case E_V2DImode:
++      emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
++      break;
++    case E_V4SFmode:
++      emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
++      break;
++    case E_V4SImode:
++      emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
++      break;
++    case E_V8HImode:
++      emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0));
++      break;
++    default:
++      break;
++    }
+ }
+ 
+-/* Return true if REG1 and REG2 match the criteria for a movep insn.  */
++static bool
++loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
++{
++  int i;
++  rtx target, op0, op1, sel, tmp;
++  rtx rperm[MAX_VECT_LEN];
+ 
+-bool
+-loongarch_movep_target_p (rtx reg1, rtx reg2)
+-{
+-  int regno1, regno2, pair;
+-  unsigned int i;
+-  static const int match[8] = {
+-    0x00000060, /* 5, 6 */
+-    0x000000a0, /* 5, 7 */
+-    0x000000c0, /* 6, 7 */
+-    0x00200010, /* 4, 21 */
+-    0x00400010, /* 4, 22 */
+-    0x00000030, /* 4, 5 */
+-    0x00000050, /* 4, 6 */
+-    0x00000090  /* 4, 7 */
+-  };
+-
+-  if (!REG_P (reg1) || !REG_P (reg2))
+-    return false;
++  if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode
++	|| d->vmode == E_V4SImode || d->vmode == E_V4SFmode
++	|| d->vmode == E_V8HImode || d->vmode == E_V16QImode)
++    {
++      target = d->target;
++      op0 = d->op0;
++      op1 = d->one_vector_p ? d->op0 : d->op1;
+ 
+-  regno1 = REGNO (reg1);
+-  regno2 = REGNO (reg2);
++      if (GET_MODE (op0) != GET_MODE (op1)
++	  || GET_MODE (op0) != GET_MODE (target))
++	return false;
+ 
+-  if (!GP_REG_P (regno1) || !GP_REG_P (regno2))
+-    return false;
++      if (d->testing_p)
++	return true;
+ 
+-  pair = (1 << regno1) | (1 << regno2);
++      for (i = 0; i < d->nelt; i += 1)
++	{
++	  rperm[i] = GEN_INT (d->perm[i]);
++	}
+ 
+-  for (i = 0; i < ARRAY_SIZE (match); i++)
+-    if (pair == match[i])
+-      return true;
++      if (d->vmode == E_V2DFmode)
++	{
++	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
++	  tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0);
++	  emit_move_insn (tmp, sel);
++	}
++      else if (d->vmode == E_V4SFmode)
++	{
++	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
++	  tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0);
++	  emit_move_insn (tmp, sel);
++	}
++      else
++	{
++	  sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
++	  emit_move_insn (d->target, sel);
++	}
++
++      switch (d->vmode)
++	{
++	case E_V2DFmode:
++	  emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
++	  break;
++	case E_V2DImode:
++	  emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
++	  break;
++	case E_V4SFmode:
++	  emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
++	  break;
++	case E_V4SImode:
++	  emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
++	  break;
++	case E_V8HImode:
++	  emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
++	  break;
++	case E_V16QImode:
++	  emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
++	  break;
++	default:
++	  break;
++	}
+ 
++      return true;
++    }
+   return false;
+ }
+-
+-/* Return the size in bytes of the trampoline code, padded to
+-   TRAMPOLINE_ALIGNMENT bits.  The static chain pointer and target
+-   function address immediately follow.  */
+ 
+-int
+-loongarch_trampoline_code_size (void)
++static bool
++loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+ {
+-  return 4 * 4;
+-}
++  unsigned int i, nelt = d->nelt;
++  unsigned char perm2[MAX_VECT_LEN];
+ 
+-/* Implement TARGET_TRAMPOLINE_INIT.  */
++  if (d->one_vector_p)
++    {
++      /* Try interleave with alternating operands.  */
++      memcpy (perm2, d->perm, sizeof(perm2));
++      for (i = 1; i < nelt; i += 2)
++	perm2[i] += nelt;
++      if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
++	return true;
++    }
++  else
++    {
++      if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
++				       d->perm, nelt))
++	return true;
+ 
+-static void
+-loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+-{
+-  rtx addr, end_addr, high, low, opcode, mem;
+-  rtx trampoline[8];
+-  unsigned int i, j;
+-  HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
++      /* Try again with swapped operands.  */
++      for (i = 0; i < nelt; ++i)
++	perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
++      if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
++	return true;
++    }
+ 
+-  /* Work out the offsets of the pointers from the start of the
+-     trampoline code.  */
+-  end_addr_offset = loongarch_trampoline_code_size ();
+-  static_chain_offset = end_addr_offset;
+-  target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
++  if (loongarch_expand_lsx_shuffle (d))
++    return true;
++  if (loongarch_expand_vec_perm_even_odd(d))
++    return true;
++  if (loongarch_expand_vec_perm_interleave(d))
++    return true;
++  return false;
++}
+ 
+-  /* Get pointers to the beginning and end of the code block.  */
+-  addr = force_reg (Pmode, XEXP (m_tramp, 0));
+-  end_addr = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
++// Following are the assist function for const vector permutation support.
++static bool
++loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
++{
++  if (d->perm[0] >= d->nelt / 2)
++    return false;
+ 
+-#define OP(X) gen_int_mode (X, SImode)
++  bool result = true;
++  unsigned char lhs = d->perm[0];
++  unsigned char rhs = d->perm[d->nelt / 2];
+ 
+-  /* Build up the code in TRAMPOLINE.  */
+-  i = 0;
+-  /*
+-     pcaddi $static_chain,0
+-     ld.[dw] $tmp,$static_chain,target_function_offset
+-     ld.[dw] $static_chain,$static_chain,static_chain_offset
+-     jirl $r0,$tmp,0
+-  */
+-  trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
+-  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
+-			| 19 /* $t7 */
+-			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
+-			| ((target_function_offset & 0xfff) << 10));
+-  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
+-			| (STATIC_CHAIN_REGNUM - GP_REG_FIRST)
+-			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
+-			| ((static_chain_offset & 0xfff) << 10));
+-  trampoline[i++] = OP (0x4c000000 | (19 << 5));
+-#undef OP
++  if ((rhs - lhs) != d->nelt / 2)
++    return false;
+ 
+-  for (j = 0; j < i; j++)
++  for (int i = 1; i < d->nelt; i += 1)
+     {
+-      mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
+-      loongarch_emit_move (mem, trampoline[j]);
++      if ((i < d->nelt / 2) && (d->perm[i] != lhs))
++	{
++	  result = false;
++	  break;
++	}
++      if ((i > d->nelt / 2) && (d->perm[i] != rhs))
++	{
++	  result = false;
++	  break;
++	}
+     }
+ 
+-  /* Set up the static chain pointer field.  */
+-  mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
+-  loongarch_emit_move (mem, chain_value);
+-
+-  /* Set up the target function field.  */
+-  mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
+-  loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
+-
+-  /* Flush the code part of the trampoline.  */
+-  emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
+-  emit_insn (gen_clear_cache (addr, end_addr));
++  return result;
+ }
+ 
+-
+-/* Implement TARGET_SHIFT_TRUNCATION_MASK.  We want to keep the default
+-   behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even
+-   when TARGET_LOONGSON_MMI is true.  */
+-
+-static unsigned HOST_WIDE_INT
+-loongarch_shift_truncation_mask (machine_mode mode)
++static bool
++loongarch_is_double_duplicate (struct expand_vec_perm_d *d)
+ {
+-  return GET_MODE_BITSIZE (mode) - 1;
+-}
++  if (!d->one_vector_p)
++    return false;
+ 
+-
+-/* Generate or test for an insn that supports a constant permutation.  */
++  if (d->nelt < 8)
++    return false;
+ 
+-#define MAX_VECT_LEN 32
++  bool result = true;
++  unsigned char buf = d->perm[0];
+ 
+-struct expand_vec_perm_d
+-{
+-  rtx target, op0, op1;
+-  unsigned char perm[MAX_VECT_LEN];
+-  machine_mode vmode;
+-  unsigned char nelt;
+-  bool one_vector_p;
+-  bool testing_p;
+-};
++  for (int i = 1; i < d->nelt; i += 2)
++    {
++      if (d->perm[i] != buf)
++	{
++	  result = false;
++	  break;
++	}
++      if (d->perm[i - 1] != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += d->nelt / 4;
++    }
+ 
+-/* Construct (set target (vec_select op0 (parallel perm))) and
+-   return true if that's a valid instruction in the active ISA.  */
++  return result;
++}
+ 
+ static bool
+-loongarch_expand_vselect (rtx target, rtx op0,
+-		     const unsigned char *perm, unsigned nelt)
++loongarch_is_odd_extraction (struct expand_vec_perm_d *d)
+ {
+-  rtx rperm[MAX_VECT_LEN], x;
+-  rtx_insn *insn;
+-  unsigned i;
++  bool result = true;
++  unsigned char buf = 1;
+ 
+-  for (i = 0; i < nelt; ++i)
+-    rperm[i] = GEN_INT (perm[i]);
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 2;
++    }
+ 
+-  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
+-  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
+-  x = gen_rtx_SET (target, x);
++  return result;
++}
+ 
+-  insn = emit_insn (x);
+-  if (recog_memoized (insn) < 0)
++static bool
++loongarch_is_even_extraction (struct expand_vec_perm_d *d)
++{
++  bool result = true;
++  unsigned char buf = 0;
++
++  for (int i = 0; i < d->nelt; i += 1)
+     {
+-      remove_insn (insn);
+-      return false;
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 2;
+     }
+-  return true;
+-}
+ 
+-/* Similar, but generate a vec_concat from op0 and op1 as well.  */
++  return result;
++}
+ 
+ static bool
+-loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+-			     const unsigned char *perm, unsigned nelt)
++loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
+ {
+-  machine_mode v2mode;
+-  rtx x;
++  bool result = true;
++  unsigned char buf = d->perm[0];
+ 
+-  if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
++  if (buf != 0 || buf != d->nelt)
+     return false;
+-  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+-  return loongarch_expand_vselect (target, x, perm, nelt);
+-}
+ 
+-/* Construct (set target (vec_select op0 (parallel selector))) and
+-   return true if that's a valid instruction in the active ISA.  */
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 1;
++    }
++
++  return result;
++}
+ 
+ static bool
+-loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
++loongarch_is_center_extraction (struct expand_vec_perm_d *d)
+ {
+-  rtx x, elts[MAX_VECT_LEN];
+-  rtvec v;
+-  rtx_insn *insn;
+-  unsigned i;
++  bool result = true;
++  unsigned buf = d->nelt / 2;
+ 
+-  if (!ISA_HAS_LSX && !ISA_HAS_LASX)
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 1;
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_reversing_permutation (struct expand_vec_perm_d *d)
++{
++  if (!d->one_vector_p)
+     return false;
+ 
+-  for (i = 0; i < d->nelt; i++)
+-    elts[i] = GEN_INT (d->perm[i]);
++  bool result = true;
++  unsigned char buf = d->nelt - 1;
+ 
+-  v = gen_rtvec_v (d->nelt, elts);
+-  x = gen_rtx_PARALLEL (VOIDmode, v);
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (d->perm[i] != buf)
++	{
++	  result = false;
++	  break;
++	}
+ 
+-  if (!loongarch_const_vector_shuffle_set_p (x, d->vmode))
++      buf -= 1;
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_di_misalign_extract (struct expand_vec_perm_d *d)
++{
++  if (d->nelt != 4 && d->nelt != 8)
+     return false;
+ 
+-  x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
+-  x = gen_rtx_SET (d->target, x);
++  bool result = true;
++  unsigned char buf;
+ 
+-  insn = emit_insn (x);
+-  if (recog_memoized (insn) < 0)
++  if (d->nelt == 4)
+     {
+-      remove_insn (insn);
+-      return false;
++      buf = 1;
++      for (int i = 0; i < d->nelt; i += 1)
++	{
++	  if (buf != d->perm[i])
++	    {
++	      result = false;
++	      break;
++	    }
++
++	  buf += 1;
++	}
++    }
++  else if (d->nelt == 8)
++    {
++      buf = 2;
++      for (int i = 0; i < d->nelt; i += 1)
++	{
++	  if (buf != d->perm[i])
++	    {
++	      result = false;
++	      break;
++	    }
++
++	  buf += 1;
++	}
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_si_misalign_extract (struct expand_vec_perm_d *d)
++{
++  if (d->vmode != E_V8SImode && d->vmode != E_V8SFmode)
++    return false;
++  bool result = true;
++  unsigned char buf = 1;
++
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 1;
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d)
++{
++  bool result = true;
++  unsigned char buf = 0;
++
++  for (int i = 0;i < d->nelt; i += 2)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 1;
++    }
++
++  if (result)
++    {
++      buf = d->nelt;
++      for (int i = 1; i < d->nelt; i += 2)
++	{
++	  if (buf != d->perm[i])
++	    {
++	      result = false;
++	      break;
++	    }
++	  buf += 1;
++	}
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d)
++{
++  if (d->vmode != E_V32QImode)
++    return false;
++  bool result = true;
++  unsigned char buf = 0;
++
++#define COMPARE_SELECTOR(INIT, BEGIN, END) \
++  buf = INIT; \
++  for (int i = BEGIN; i < END && result; i += 1) \
++    { \
++      if (buf != d->perm[i]) \
++	{ \
++	  result = false; \
++	  break; \
++	} \
++      buf += 1; \
++    }
++
++  COMPARE_SELECTOR (0, 0, 8);
++  COMPARE_SELECTOR (32, 8, 16);
++  COMPARE_SELECTOR (8, 16, 24);
++  COMPARE_SELECTOR (40, 24, 32);
++
++#undef COMPARE_SELECTOR
++  return result;
++}
++
++static bool
++loongarch_is_lasx_lowpart_extract (struct expand_vec_perm_d *d)
++{
++  bool result = true;
++  unsigned char buf = 0;
++
++  for (int i = 0; i < d->nelt / 2; i += 1)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 1;
++    }
++
++  if (result)
++    {
++      buf = d->nelt;
++      for (int i = d->nelt / 2; i < d->nelt; i += 1)
++	{
++	  if (buf != d->perm[i])
++	    {
++	      result = false;
++	      break;
++	    }
++	  buf += 1;
++	}
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d)
++{
++  bool result = true;
++  unsigned char buf = d->nelt / 2;
++
++  for (int i = 0; i < d->nelt; i += 2)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++      buf += 1;
++    }
++
++  if (result)
++    {
++      buf = d->nelt + d->nelt / 2;
++      for (int i = 1; i < d->nelt;i += 2)
++	{
++	  if (buf != d->perm[i])
++	    {
++	      result = false;
++	      break;
++	    }
++	  buf += 1;
++	}
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_lasx_highpart_interleave_2 (struct expand_vec_perm_d *d)
++{
++  if (d->vmode != E_V32QImode)
++    return false;
++
++  bool result = true;
++  unsigned char buf = 0;
++
++#define COMPARE_SELECTOR(INIT, BEGIN, END) \
++  buf = INIT; \
++  for (int i = BEGIN; i < END && result; i += 1) \
++    { \
++      if (buf != d->perm[i]) \
++	{ \
++	  result = false; \
++	  break; \
++	} \
++      buf += 1; \
++    }
++
++  COMPARE_SELECTOR (16, 0, 8);
++  COMPARE_SELECTOR (48, 8, 16);
++  COMPARE_SELECTOR (24, 16, 24);
++  COMPARE_SELECTOR (56, 24, 32);
++
++#undef COMPARE_SELECTOR
++  return result;
++}
++
++static bool
++loongarch_is_elem_duplicate (struct expand_vec_perm_d *d)
++{
++  bool result = true;
++  unsigned char buf = d->perm[0];
++
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (buf != d->perm[i])
++	{
++	  result = false;
++	  break;
++	}
++    }
++
++  return result;
++}
++
++inline bool
++loongarch_is_op_reverse_perm (struct expand_vec_perm_d *d)
++{
++  return (d->vmode == E_V4DFmode)
++    && d->perm[0] == 2 && d->perm[1] == 3
++    && d->perm[2] == 0 && d->perm[3] == 1;
++}
++
++static bool
++loongarch_is_single_op_perm (struct expand_vec_perm_d *d)
++{
++  bool result = true;
++
++  for (int i = 0; i < d->nelt; i += 1)
++    {
++      if (d->perm[i] >= d->nelt)
++	{
++	  result = false;
++	  break;
++	}
++    }
++
++  return result;
++}
++
++static bool
++loongarch_is_divisible_perm (struct expand_vec_perm_d *d)
++{
++  bool result = true;
++
++  for (int i = 0; i < d->nelt / 2; i += 1)
++    {
++      if (d->perm[i] >= d->nelt)
++	{
++	  result = false;
++	  break;
++	}
++    }
++
++  if (result)
++    {
++      for (int i = d->nelt / 2; i < d->nelt; i += 1)
++	{
++	  if (d->perm[i] < d->nelt)
++	    {
++	      result = false;
++	      break;
++	    }
++	}
++    }
++
++  return result;
++}
++
++inline bool
++loongarch_is_triple_stride_extract (struct expand_vec_perm_d *d)
++{
++  return (d->vmode == E_V4DImode || d->vmode == E_V4DFmode)
++    && d->perm[0] == 1 && d->perm[1] == 4
++    && d->perm[2] == 7 && d->perm[3] == 0;
++}
++
++/* In LASX, xvshuf.* insn does not have the behavior that gcc expects when
++ * compiler wants to emit a vector permutation.
++ *
++ * 1. What GCC provides via vectorize_vec_perm_const()'s paramater:
++ * When GCC wants to performs a vector permutation, it provides two op 
++ * reigster, one target register, and a selector. 
++ * In const vector permutation case, GCC provides selector as a char array 
++ * that contains original value; in variable vector permuatation
++ * (performs via vec_perm<mode> insn template), it provides a vector register.
++ * We assume that nelt is the elements numbers inside single vector in current
++ * 256bit vector mode.
++ *
++ * 2. What GCC expects to perform:
++ * Two op registers(op0, op1) will "combine" into a 512bit temp vector storage
++ * that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit
++ * is op1, then the elements are indexed as below:
++ *               0 ~ nelt - 1          nelt ~ 2 * nelt - 1       
++ *       |-------------------------|-------------------------|
++ *             Low 256bit (op0)         High 256bit(op1)
++ * For example, the second element in op1(V8SImode) will be indexed with 9.
++ * Selector is a vector that has the same mode and number of elements  with
++ * op0,op1 and target, it's look like this:
++ *              0 ~ nelt - 1
++ *       |-------------------------|
++ *            256bit (selector)
++ * It describes which element from 512bit temp vector storage will fit into
++ * target's every element slot. 
++ * GCC expects that every element in selector can be ANY indices of 512bit
++ * vector storage(Selector can pick literally any element from op0 and op1, and
++ * then fits into any place of target register). This is also what LSX 128bit 
++ * vshuf.* instruction do similarly, so we can handle 128bit vector permutation
++ * by single instruction easily.
++ *
++ * 3. What xvshuf.* instruction does:
++ * In short, it just do TWO 128bit vector permuatation, it's the reason that we
++ * need to do these jobs. We will explain it.
++ * op0, op1, target, and selector will be separate into high 128bit and low
++ * 128bit, and do permutation as the description below:
++ *
++ *  a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp
++ * vector storage(TVS1), elements are indexed as below:
++ *         0 ~ nelt / 2 - 1     nelt / 2 ~ nelt - 1
++ *      |---------------------|---------------------| TVS1
++ *         op0's low 128bit      op1's low 128bit
++ *    op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the
++ *    same way.
++ *         0 ~ nelt / 2 - 1     nelt / 2 ~ nelt - 1
++ *      |---------------------|---------------------| TVS2
++ *         op0's high 128bit      op1's high 128bit
++ *  b) Selector's low 128bit describes which elements from TVS1 will fit into
++ *  target vector's low 128bit. No TVS2 elements are allowed.
++ *  c) Selector's high 128bit describes which elements from TVS2 will fit into
++ *  target vector's high 128bit. No TVS1 elements are allowed.
++ *
++ * As we can see, if we want to handle vector permutation correctly, we can
++ * achieve it in three ways:
++ *  a) Modify selector's elements, to make sure that every elements can inform
++ *  correct value that will put into target vector.
++    b) Generate extra instruction before/after xvshuf.* instruction, for
++    adjusting op vector or target vector, to make sure target vector's value is
++    what GCC expects.
++    c) Use other instructions to process op and put correct result into target.
++ */
++
++/* Implementation of constant vector permuatation. This function identifies
++ * recognized pattern of permuation selector argument, and use one or more
++ * instruction(s) to finish the permutation job correctly. For unsupported
++ * patterns, it will return false.  */
++
++static bool
++loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
++{
++  // Although we have the LSX vec_perm<mode> template, there's still some
++  // 128bit vector permuatation operations send to vectorize_vec_perm_const.
++  // In this case, we just simpliy wrap them by single vshuf.* instruction,
++  // because LSX vshuf.* instruction just have the same behavior that GCC
++  // expects.
++  if (d->vmode != E_V32QImode && d->vmode != E_V16HImode
++      && d->vmode != E_V4DImode && d->vmode != E_V4DFmode
++      && d->vmode != E_V8SImode && d->vmode != E_V8SFmode)
++    return loongarch_try_expand_lsx_vshuf_const (d);
++
++  bool ok = false, reverse_hi_lo = false, extract_ev_od = false,
++       use_alt_op = false;
++  unsigned char idx;
++  int i;
++  rtx target, op0, op1, sel, tmp;
++  rtx op0_alt = NULL_RTX, op1_alt = NULL_RTX;
++  rtx rperm[MAX_VECT_LEN];
++  unsigned char remapped[MAX_VECT_LEN];
++
++  // Try to figure out whether is a recognized permutation selector pattern, if
++  // yes, we will reassign some elements with new value in selector argument,
++  // and in some cases we will generate some assist insn to complete the
++  // permutation. (Even in some cases, we use other insn to impl permutation
++  // instead of xvshuf!)
++
++  // Make sure to check d->testing_p is false everytime if you want to emit new
++  // insn, unless you want to crash into ICE directly.
++  if (loongarch_is_quad_duplicate (d))
++    {
++      // Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 }
++      // copy first elem from original selector to all elem in new selector.
++      idx = d->perm[0];
++      for (i = 0; i < d->nelt; i += 1)
++	{
++	  remapped[i] = idx;
++	}
++      // Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }
++    }
++  else if (loongarch_is_double_duplicate (d))
++    {
++      // Selector example: E_V8SImode, { 1, 1, 3, 3, 5, 5, 7, 7 }
++      // one_vector_p == true
++      for (i = 0; i < d->nelt / 2; i += 1)
++	{
++	  idx = d->perm[i];
++	  remapped[i] = idx;
++	  remapped[i + d->nelt / 2] = idx;
++	}
++      // Selector after: { 1, 1, 3, 3, 1, 1, 3, 3 }
++    }
++  else if (loongarch_is_odd_extraction (d)
++	   || loongarch_is_even_extraction (d))
++    {
++      // Odd extraction selector sample: E_V4DImode, { 1, 3, 5, 7 }
++      // Selector after: { 1, 3, 1, 3 }
++      // Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 }
++      // Selector after: { 0, 2, 0, 2 }
++      for (i = 0; i < d->nelt / 2; i += 1)
++	{
++	  idx = d->perm[i];
++	  remapped[i] = idx;
++	  remapped[i + d->nelt / 2] = idx;
++	}
++      // Additional insn is required for correct result. See codes below.
++      extract_ev_od = true;
++    }
++  else if (loongarch_is_extraction_permutation (d))
++    {
++      // Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }
++      if (d->perm[0] == 0)
++	{
++	  for (i = 0; i < d->nelt / 2; i += 1)
++	    {
++	      remapped[i] = i;
++	      remapped[i + d->nelt / 2] = i;
++	    }
++	}
++      else
++	{
++	  // { 8, 9, 10, 11, 12, 13, 14, 15 }
++	  for (i = 0; i < d->nelt / 2; i += 1)
++	    {
++	      idx = i + d->nelt / 2;
++	      remapped[i] = idx;
++	      remapped[i + d->nelt / 2] = idx;
++	    }
++	}
++      // Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 }
++      // { 8, 9, 10, 11, 8, 9, 10, 11 }
++    }
++  else if (loongarch_is_center_extraction (d))
++    {
++      // sample: E_V4DImode, { 2, 3, 4, 5 }
++      // In this condition, we can just copy high 128bit of op0 and low 128bit
++      // of op1 to the target register by using xvpermi.q insn.
++      if (!d->testing_p)
++	{
++	  emit_move_insn (d->target, d->op1);
++	  switch (d->vmode)
++	    {
++	      case E_V4DImode:
++		emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0, GEN_INT (0x21)));
++		break;
++	      case E_V4DFmode:
++		emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0, GEN_INT (0x21)));
++		break;
++	      case E_V8SImode:
++		emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, d->op0, GEN_INT (0x21)));
++		break;
++	      case E_V8SFmode:
++		emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, d->op0, GEN_INT (0x21)));
++		break;
++	      case E_V16HImode:
++		emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, d->op0, GEN_INT (0x21)));
++		break;
++	      case E_V32QImode:
++		emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, d->op0, GEN_INT (0x21)));
++		break;
++	      default:
++		break;
++	    }
++	}
++      ok = true;
++      // Finish the funtion directly.
++      goto expand_perm_const_2_end;
++    }
++  else if (loongarch_is_reversing_permutation (d))
++    {
++      // Selector sample: E_V8SImode, { 7, 6, 5, 4, 3, 2, 1, 0 }
++      // one_vector_p == true
++      idx = d->nelt / 2 - 1;
++      for (i = 0; i < d->nelt / 2; i += 1)
++	{
++	  remapped[i] = idx;
++	  remapped[i + d->nelt / 2] = idx;
++	  idx -= 1;
++	}
++      // Selector after: { 3, 2, 1, 0, 3, 2, 1, 0 }
++      // Additional insn will be generated to swap hi and lo 128bit of target
++      // register.
++      reverse_hi_lo = true;
++    }
++  else if (loongarch_is_di_misalign_extract (d)
++	   || loongarch_is_si_misalign_extract (d))
++    {
++      // Selector Sample:
++      // DI misalign: E_V4DImode, { 1, 2, 3, 4 }
++      // SI misalign: E_V8SImode, { 1, 2, 3, 4, 5, 6, 7, 8 }
++      if (!d->testing_p)
++	{
++	  // Copy original op0/op1 value to new temp register.
++	  // In some cases, operand register may be used in multiple place, so
++	  // we need new regiter instead modify original one, to avoid runtime
++	  // crashing or wrong value after execution.
++	  use_alt_op = true;
++	  op1_alt = gen_reg_rtx (d->vmode);
++	  emit_move_insn (op1_alt, d->op1);
++
++	  // Adjust op1 for selecting correct value in high 128bit of target
++	  // register.
++	  // op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
++					      conv_op0, GEN_INT (0x21)));
++
++	  for (i = 0; i < d->nelt / 2; i += 1)
++	    {
++	      remapped[i] = d->perm[i];
++	      remapped[i + d->nelt / 2] = d->perm[i];
++	    }
++	  // Selector after:
++	  // DI misalign: { 1, 2, 1, 2 }
++	  // SI misalign: { 1, 2, 3, 4, 1, 2, 3, 4 }
++	}
++    }
++  else if (loongarch_is_lasx_lowpart_interleave (d))
++    {
++      // Elements from op0's low 18bit and op1's 128bit are inserted into
++      // target register alternately.
++      //sample: E_V4DImode, { 0, 4, 1, 5 }
++      if (!d->testing_p)
++	{
++	  // Prepare temp register instead of modify original op.
++	  use_alt_op = true;
++	  op1_alt = gen_reg_rtx (d->vmode);
++	  op0_alt = gen_reg_rtx (d->vmode);
++	  emit_move_insn (op1_alt, d->op1);
++	  emit_move_insn (op0_alt, d->op0);
++
++	  // Generate subreg for fitting into insn gen function.
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++
++	  // Adjust op value in temp register.
++	  // op0 = {0,1,2,3}, op1 = {4,5,0,1}
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
++					      conv_op0, GEN_INT (0x02)));
++	  // op0 = {0,1,4,5}, op1 = {4,5,0,1}
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
++					      conv_op1, GEN_INT (0x01)));
++
++	  // Remap indices in selector based on the location of index inside
++	  // selector, and vector element numbers in current vector mode.
++
++	  // Filling low 128bit of new selector.
++	  for (i = 0; i < d->nelt / 2; i += 1)
++	    {
++	      // value in odd-indexed slot of low 128bit part of selector
++	      // vector.
++	      remapped[i] = i % 2 != 0 ? d->perm[i] - d->nelt / 2 : d->perm[i];
++	    }
++	  // Then filling the high 128bit.
++	  for (i = d->nelt / 2; i < d->nelt; i += 1)
++	    {
++	      // value in even-indexed slot of high 128bit part of 
++	      // selector vector.
++	      remapped[i] = i % 2 == 0 ? d->perm[i] + (d->nelt / 2) * 3 : d->perm[i];
++	    }
++	}
++    }
++  else if (loongarch_is_lasx_lowpart_interleave_2 (d))
++    {
++      // Special lowpart interleave case in V32QI vector mode. It does the same
++      // thing as we can see in if branch that above this line.
++      // Selector sample: E_V32QImode,
++      //	    {0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8, 9, 10,
++      //	    11, 12, 13, 14, 15, 40, 41, 42, 43, 44, 45, 46, 47}
++      if (!d->testing_p)
++	{
++	  // Solution for this case in very simple - covert op into V4DI mode,
++	  // and do same thing as previous if branch.
++	  op1_alt = gen_reg_rtx (d->vmode);
++	  op0_alt = gen_reg_rtx (d->vmode);
++	  emit_move_insn (op1_alt, d->op1);
++	  emit_move_insn (op0_alt, d->op0);
++
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
++					      conv_op0, GEN_INT (0x02)));
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
++					      conv_op1, GEN_INT (0x01)));
++	  remapped[0] = 0;
++	  remapped[1] = 4;
++	  remapped[2] = 1;
++	  remapped[3] = 5;
++
++	  for (i = 0; i < d->nelt; i += 1)
++	    {
++	      rperm[i] = GEN_INT (remapped[i]);
++	    }
++
++	  sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v(4, rperm));
++	  sel = force_reg (E_V4DImode, sel);
++	  emit_insn (gen_lasx_xvshuf_d (conv_target, sel,
++					conv_op1, conv_op0));
++	}
++
++      ok = true;
++      goto expand_perm_const_2_end;
++    }
++  else if (loongarch_is_lasx_lowpart_extract (d))
++    {
++      // Copy op0's low 128bit to target's low 128bit, and copy op1's low
++      // 128bit to target's high 128bit.
++      // Selector sample: E_V4DImode, { 0, 1, 4 ,5 }
++      if (!d->testing_p)
++	{
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++
++	  // We can achieve the expectation by using sinple xvpermi.q insn.
++	  emit_move_insn (conv_target, conv_op1);
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_target, conv_target,
++					      conv_op0, GEN_INT(0x20)));
++	}
++
++      ok = true;
++      goto expand_perm_const_2_end;
++    }
++  else if (loongarch_is_lasx_highpart_interleave (d))
++    {
++      // Similar to lowpart interleave, elements from op0's high 128bit and
++      // op1's high 128bit are inserted into target regiter alternately.
++      // Selector sample: E_V8SImode, { 4, 12, 5, 13, 6, 14, 7, 15 }
++      if (!d->testing_p)
++	{
++	  // Prepare temp op register.
++	  use_alt_op = true;
++	  op1_alt = gen_reg_rtx (d->vmode);
++	  op0_alt = gen_reg_rtx (d->vmode);
++	  emit_move_insn (op1_alt, d->op1);
++	  emit_move_insn (op0_alt, d->op0);
++
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  // Adjust op value in temp regiter.
++	  // op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 }
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
++					      conv_op0, GEN_INT (0x13)));
++	  // op0 = { 2, 3, 6, 7 }, op1 = { 6, 7, 2, 3 }
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
++					      conv_op1, GEN_INT (0x01)));
++	  // Remap indices in selector based on the location of index inside
++	  // selector, and vector element numbers in current vector mode.
++
++	  // Filling low 128bit of new selector.
++	 for (i = 0; i < d->nelt / 2; i += 1)
++	   {
++	     // value in even-indexed slot of low 128bit part of selector
++	     // vector.
++	     remapped[i] = i % 2 == 0 ? d->perm[i] - d->nelt / 2 : d->perm[i];
++	   }
++	  // Then filling the high 128bit.
++	 for (i = d->nelt / 2; i < d->nelt; i += 1)
++	   {
++	     // value in odd-indexed slot of high 128bit part of selector
++	     // vector.
++	      remapped[i] = i % 2 != 0 ? d->perm[i] - (d->nelt / 2) * 3 : d->perm[i];
++	   }
++	}
++    }
++  else if (loongarch_is_lasx_highpart_interleave_2 (d))
++    {
++      // Special highpart interleave case in V32QI vector mode. It does the
++      // same thing as the normal version above.
++      // Selector sample: E_V32QImode,
++      //	  {16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51, 52, 53, 54, 55, 24,
++      //	  25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63}
++      if (!d->testing_p)
++	{
++	  // Convert op into V4DImode and do the things.
++	  op1_alt = gen_reg_rtx (d->vmode);
++	  op0_alt = gen_reg_rtx (d->vmode);
++	  emit_move_insn (op1_alt, d->op1);
++	  emit_move_insn (op0_alt, d->op0);
++
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
++	 				  conv_op0, GEN_INT (0x13)));
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
++	 				  conv_op1, GEN_INT (0x01)));
++	  remapped[0] = 2;
++	  remapped[1] = 6;
++	  remapped[2] = 3;
++	  remapped[3] = 7;
++
++	  for (i = 0; i < d->nelt; i += 1)
++	    {
++	      rperm[i] = GEN_INT (remapped[i]);
++	    }
++
++	  sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v(4, rperm));
++	  sel = force_reg (E_V4DImode, sel);
++	  emit_insn (gen_lasx_xvshuf_d (conv_target, sel,
++					conv_op1, conv_op0));
++	}
++
++	ok = true;
++	goto expand_perm_const_2_end;
++    }
++  else if (loongarch_is_elem_duplicate (d))
++    {
++      // Brocast single element (from op0 or op1) to all slot of target
++      // register.
++      // Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 }
++      if (!d->testing_p)
++	{
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  rtx temp_reg = gen_reg_rtx (d->vmode);
++	  rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0);
++
++	  emit_move_insn (temp_reg, d->op0);
++
++	  idx = d->perm[0];
++	  // We will use xvrepl128vei.* insn to achieve the result, but we need
++	  // to make the high/low 128bit has the same contents that contain the
++	  // value that we need to broardcast, because xvrepl128vei does the
++	  // broardcast job from every 128bit of source register to
++	  // corresponded part of target register! (A deep sigh.)
++	  if (/*idx >= 0 &&*/ idx < d->nelt / 2)
++	    {
++	      emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
++						  conv_op0, GEN_INT (0x0)));
++	    }
++	  else if (idx >= d->nelt / 2 && idx < d->nelt)
++	    {
++	      emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
++						  conv_op0, GEN_INT (0x11)));
++	      idx -= d->nelt / 2;
++	    }
++	  else if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
++	    {
++	      emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
++						  conv_op1, GEN_INT (0x0)));
++	    }
++	  else if (idx >= (d->nelt + d->nelt / 2) && idx < d->nelt * 2)
++	    {
++	      emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
++						  conv_op1, GEN_INT (0x11)));
++	      idx -= d->nelt / 2;
++	    }
++
++	  // Then we can finally generate this insn.
++	  switch (d->vmode)
++	    {
++	    case E_V4DImode:
++	      emit_insn (gen_lasx_xvrepl128vei_d (d->target, temp_reg, GEN_INT (idx)));
++	      break;
++	    case E_V4DFmode:
++	      emit_insn (gen_lasx_xvrepl128vei_d_f (d->target, temp_reg, GEN_INT (idx)));
++	      break;
++	    case E_V8SImode:
++	      emit_insn (gen_lasx_xvrepl128vei_w (d->target, temp_reg, GEN_INT (idx)));
++	      break;
++	    case E_V8SFmode:
++	      emit_insn (gen_lasx_xvrepl128vei_w_f (d->target, temp_reg, GEN_INT (idx)));
++	      break;
++	    case E_V16HImode:
++	      emit_insn (gen_lasx_xvrepl128vei_h (d->target, temp_reg, GEN_INT (idx)));
++	      break;
++	    case E_V32QImode:
++	      emit_insn (gen_lasx_xvrepl128vei_b (d->target, temp_reg, GEN_INT(idx)));
++	      break;
++	    default:
++	      gcc_unreachable ();
++	      break;
++	    }
++
++	  // finish func directly.
++	  ok = true;
++	  goto expand_perm_const_2_end;
++	}
++    }
++  else if (loongarch_is_op_reverse_perm (d))
++    {
++      // reverse high 128bit and low 128bit in op0.
++      // Selector sample: E_V4DFmode, { 2, 3, 0, 1 }
++      // Use xvpermi.q for doing this job.
++      if (!d->testing_p)
++	{
++	  if (d->vmode == E_V4DImode)
++	    {
++	      emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0,
++						  GEN_INT (0x01)));
++	    }
++	  else if (d->vmode == E_V4DFmode)
++	    {
++	      emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0,
++						  GEN_INT (0x01)));
++	    }
++	  else
++	    {
++	      gcc_unreachable ();
++	    }
++	}
++
++      ok = true;
++      goto expand_perm_const_2_end;
++    }
++  else if (loongarch_is_single_op_perm (d))
++    {
++      //Permutation that only select elements from op0.
++      if (!d->testing_p)
++	{
++	  // Prepare temp register instead of modify original op.
++	  use_alt_op = true;
++	  op0_alt = gen_reg_rtx (d->vmode);
++	  op1_alt = gen_reg_rtx (d->vmode);
++
++	  emit_move_insn (op0_alt, d->op0);
++	  emit_move_insn (op1_alt, d->op1);
++
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++
++	  // Duplicate op0's low 128bit in op0, then duplicate high 128bit
++	  // in op1. After this, xvshuf.* insn's selector argument can 
++	  // access all elements we need for correct permutation result.
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op0,
++					      GEN_INT (0x00)));
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0,
++					      GEN_INT (0x11)));
++
++	  // In this case, there's no need to remap selector's indices.
++	  for (i = 0; i < d->nelt; i += 1)
++	    {
++	      remapped[i] = d->perm[i];
++	    }
++	}
++    }
++  else if (loongarch_is_divisible_perm (d))
++    {
++      // Divisible perm:
++      // Low 128bit of selector only selects elements of op0,
++      // and high 128bit of selector only selects elements of op1.
++
++      if (!d->testing_p)
++	{
++	  // Prepare temp register instead of modify original op.
++	  use_alt_op = true;
++	  op0_alt = gen_reg_rtx (d->vmode);
++	  op1_alt = gen_reg_rtx (d->vmode);
++
++	  emit_move_insn (op0_alt, d->op0);
++	  emit_move_insn (op1_alt, d->op1);
++
++	  rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
++	  rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
++	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
++	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
++
++	  // Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure
++	  //that selector's low 128bit can access all op0's elements, and
++	  //selector's high 128bit can access all op1's elements.
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op1,
++					      GEN_INT (0x02)));
++	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0,
++					      GEN_INT (0x31)));
++
++	  // No need to modify indices.
++	  for (i = 0; i < d->nelt;i += 1)
++	    {
++	      remapped[i] = d->perm[i];
++	    }
++	}
++    }
++  else if (loongarch_is_triple_stride_extract (d))
++    {
++      // Selector sample: E_V4DFmode, { 1, 4, 7, 0 }
++      if (!d->testing_p)
++	{
++	  // Resolve it with brute force modification.
++	  remapped[0] = 1;
++	  remapped[1] = 2;
++	  remapped[2] = 3;
++	  remapped[3] = 0;
++	}
++    }
++  else
++    {
++      // When all of the detections above are failed, we will try last
++      // strategy.
++      // The for loop tries to detect following rules based on indices' value
++      // , its position inside of selector vector ,and strange behavior of xvshuf.* insn;
++      // Then we take corresponding action. (Replace with new value, or give up 
++      // whole permutation expansion.)
++      for (i = 0; i < d->nelt; i += 1)
++	{
++	  idx = d->perm[i]/* % (2 * d->nelt)*/;
++
++	  // if index is located in low 128bit of selector vector
++	  if (i < d->nelt / 2)
++	    {
++	      // Fail case 1: index tries to reach element that located in op0's
++	      // high 128bit.
++	      if (idx >= d->nelt / 2 && idx < d->nelt)
++		{
++		  goto expand_perm_const_2_end;
++		}
++	      // Fail case 2: index tries to reach element that located in
++	      // op1's high 128bit.
++	      if (idx >= (d->nelt + d->nelt / 2))
++		{
++		  goto expand_perm_const_2_end;
++		}
++
++	      // Success case: index tries to reach elements that located in
++	      // op1's low 128bit. Apply - (nelt / 2) offset to original value.
++	      if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
++		{
++		  idx -= d->nelt / 2;
++		}
++	    }
++	  // if index is located in high 128bit of selector vector
++	  else
++	    {
++	      // Fail case 1: index tries to reach element that located in
++	      // op1's low 128bit.
++	      if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
++		{
++		  goto expand_perm_const_2_end;
++		}
++	      // Fail case 2: index tries to reach element that located in
++	      // op0's low 128bit.
++	      if (idx < (d->nelt / 2))
++		{
++		  goto expand_perm_const_2_end;
++		}
++	      // Success case: index tries to reach element that located in
++	      // op0's high 128bit.
++	      if (idx >= d->nelt / 2 && idx < d->nelt)
++		{
++		  idx -= d->nelt / 2;
++		}
++	    }
++	  // No need to process other case that we did not mentioned.
++
++	  // Assign with original or processed value.
++	  remapped[i] = idx;
++	}
++    }
++
++  ok = true;
++  // If testing_p is true, compiler is trying to figure out that backend can
++  // handle this permutation, but doesn't want to generate actual insn. So if
++  // true, exit directly.
++  if (d->testing_p)
++    {
++      goto expand_perm_const_2_end;
++    }
++
++  // Convert remapped selector array to RTL array.
++  for (i = 0; i < d->nelt; i += 1)
++    {
++      rperm[i] = GEN_INT (remapped[i]);
++    }
++
++  // Copy selector vector from memory to vector regiter for later insn gen
++  // function.
++  // if vector's element in floating point value, we cannot fit selector
++  // argument into insn gen function directly, because of the insn template
++  // definition. As a solution, generate a integral mode subreg of target,
++  // then copy selector vector(that is in integral mode) to this subreg.
++  switch (d->vmode)
++    {
++    case E_V4DFmode:
++      sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm));
++      tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++      emit_move_insn (tmp, sel);
++      break;
++    case E_V8SFmode:
++      sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm));
++      tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0);
++      emit_move_insn (tmp, sel);
++      break;
++    default:
++      sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
++      emit_move_insn (d->target, sel);
++      break;
+     }
+-  return true;
+-}
+-
+-static bool
+-loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+-{
+-  unsigned int i, nelt = d->nelt;
+-  unsigned char perm2[MAX_VECT_LEN];
+ 
+-  if (d->one_vector_p)
++  target = d->target;
++  // If temp op registers are requested in previous if branch, then use temp
++  // register intead of original one.
++  if (use_alt_op)
+     {
+-      /* Try interleave with alternating operands.  */
+-      memcpy (perm2, d->perm, sizeof(perm2));
+-      for (i = 1; i < nelt; i += 2)
+-	perm2[i] += nelt;
+-      if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
+-	return true;
++      op0 = op0_alt != NULL_RTX ? op0_alt : d->op0;
++      op1 = op1_alt != NULL_RTX ? op1_alt : d->op1;
+     }
+   else
+     {
+-      if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
+-				       d->perm, nelt))
+-	return true;
++      op0 = d->op0;
++      op1 = d->one_vector_p ? d->op0 : d->op1;
++    }
+ 
+-      /* Try again with swapped operands.  */
+-      for (i = 0; i < nelt; ++i)
+-	perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
+-      if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+-	return true;
++  // We FINALLY can generate xvshuf.* insn.
++  switch (d->vmode)
++    {
++    case E_V4DFmode:
++      emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0));
++      break;
++    case E_V4DImode:
++      emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0));
++      break;
++    case E_V8SFmode:
++      emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0));
++      break;
++    case E_V8SImode:
++      emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0));
++      break;
++    case E_V16HImode:
++      emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0));
++      break;
++    case E_V32QImode:
++      emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target));
++      break;
++    default:
++      gcc_unreachable ();
++      break;
+     }
+ 
+-  if (loongarch_expand_lsx_shuffle (d))
+-    return true;
+-  return false;
++  // extra insn for swapping the hi/lo 128bit of target vector register.
++  if (reverse_hi_lo)
++    {
++      switch (d->vmode)
++	{
++	case E_V4DFmode:
++	  emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->target, GEN_INT (0x1)));
++	  break;
++	case E_V4DImode:
++	  emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->target, GEN_INT (0x1)));
++	  break;
++	case E_V8SFmode:
++	  emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, d->target, GEN_INT (0x1)));
++	  break;
++	case E_V8SImode:
++	  emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, d->target, GEN_INT (0x1)));
++	  break;
++	case E_V16HImode:
++	  emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, d->target, GEN_INT (0x1)));
++	  break;
++	case E_V32QImode:
++	  emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, d->target, GEN_INT (0x1)));
++	  break;
++	default:
++	  break;
++	}
++    }
++  // extra insn required by odd/even extraction. Swapping the second and third
++  // 64bit in target vector register.
++  else if (extract_ev_od)
++    {
++      rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
++      emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, GEN_INT (0xD8)));
++    }
++
++expand_perm_const_2_end:
++  return ok;
+ }
+ 
+ /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
+@@ -9043,13 +9312,19 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+       if (!d.one_vector_p)
+ 	d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+ 
++      ok = loongarch_expand_vec_perm_const_2 (&d);
++      if (ok)
++	return ok;
++
+       start_sequence ();
+       ok = loongarch_expand_vec_perm_const_1 (&d);
+       end_sequence ();
+       return ok;
+     }
+ 
+-  ok = loongarch_expand_vec_perm_const_1 (&d);
++    ok = loongarch_expand_vec_perm_const_2 (&d);
++    if (!ok)
++      ok = loongarch_expand_vec_perm_const_1 (&d);
+ 
+   /* If we were given a two-vector permutation which just happened to
+      have both input vectors equal, we folded this into a one-vector
+@@ -9070,16 +9345,18 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+   return ok;
+ }
+ 
+-/* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
+-
+ static int
+-loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+-				machine_mode mode)
++loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
++					 unsigned int opc, machine_mode mode)
+ {
+-  switch (loongarch_tune)
++  /* unreferenced argument */
++  (void) opc;
++
++  switch (target->cpu_tune)
+     {
+-    case PROCESSOR_LOONGARCH64:
+-    case PROCESSOR_LA464:
++    case CPU_LOONGARCH64:
++    case CPU_LA464:
++    case CPU_LA664:
+       /* Vector part.  */
+       if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
+         {
+@@ -9094,10 +9371,164 @@ loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+       else if (FLOAT_MODE_P (mode))
+           return 4;
+       break;
++    }
++
++  /* default is 1 */
++  return 1;
++}
++
++/* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
++
++static int
++loongarch_sched_reassociation_width (unsigned int opc, machine_mode mode)
++{
++  return loongarch_cpu_sched_reassociation_width (&la_target, opc, mode);
++}
++
++/* Implement extract a scalar element from vecotr register */
++
++void
++loongarch_expand_vector_extract (rtx target, rtx vec, int elt)
++{
++  machine_mode mode = GET_MODE (vec);
++  machine_mode inner_mode = GET_MODE_INNER (mode);
++  rtx tmp;
++
++  switch (mode)
++    {
++    case E_V8HImode:
++    case E_V16QImode:
++      break;
++
++    case E_V32QImode:
++      if (ISA_HAS_LASX)
++        {
++          if (elt >= 16)
++            {
++              tmp = gen_reg_rtx (V32QImode);
++              emit_insn (gen_lasx_xvpermi_d_v32qi (tmp, vec, GEN_INT (0xe)));
++              loongarch_expand_vector_extract (target, gen_lowpart (V16QImode, tmp), elt & 15);
++            }
++          else
++            loongarch_expand_vector_extract (target, gen_lowpart (V16QImode, vec), elt & 15);
++          return;
++        }
++      break;
++
++    case E_V16HImode:
++      if (ISA_HAS_LASX)
++        {
++          if (elt >= 8)
++            {
++              tmp = gen_reg_rtx (V16HImode);
++              emit_insn (gen_lasx_xvpermi_d_v16hi (tmp, vec, GEN_INT (0xe)));
++              loongarch_expand_vector_extract (target, gen_lowpart (V8HImode, tmp), elt & 7);
++            }
++          else
++            loongarch_expand_vector_extract (target, gen_lowpart (V8HImode, vec), elt & 7);
++	  return;
++        }
++      break;
++
+     default:
+       break;
+     }
+-  return 1;
++
++  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
++  tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
++
++  /* Let the rtl optimizers know about the zero extension performed.  */
++  if (inner_mode == QImode || inner_mode == HImode)
++    {
++      tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
++      target = gen_lowpart (SImode, target);
++    }
++  if (inner_mode == SImode || inner_mode == DImode)
++    {
++      tmp = gen_rtx_SIGN_EXTEND (inner_mode, tmp);
++    }
++
++  emit_insn (gen_rtx_SET (target, tmp));
++}
++
++/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
++   to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
++   The upper bits of DEST are undefined, though they shouldn't cause
++   exceptions (some bits from src or all zeros are ok).  */
++
++static void
++emit_reduc_half (rtx dest, rtx src, int i)
++{
++  rtx tem, d = dest;
++  switch (GET_MODE (src))
++    {
++    case E_V4SFmode:
++      tem = gen_lsx_vbsrl_w_f (dest, src, GEN_INT (i == 128 ? 8 : 4));
++      break;
++    case E_V2DFmode:
++      tem = gen_lsx_vbsrl_d_f (dest, src, GEN_INT (8));
++      break;
++    case E_V8SFmode:
++      if (i == 256)
++        tem = gen_lasx_xvpermi_d_v8sf (dest, src, GEN_INT (0xe));
++      else
++        tem = gen_lasx_xvshuf4i_w_f (dest, src,
++                                     GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
++      break;
++    case E_V4DFmode:
++      if (i == 256)
++        tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe));
++      else
++        tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx);
++      break;
++    case E_V32QImode:
++    case E_V16HImode:
++    case E_V8SImode:
++    case E_V4DImode:
++      d = gen_reg_rtx (V4DImode);
++      if (i == 256)
++        tem = gen_lasx_xvpermi_d_v4di (d, gen_lowpart (V4DImode, src), GEN_INT (0xe));
++      else
++        tem = gen_lasx_xvbsrl_d (d, gen_lowpart (V4DImode, src), GEN_INT (i/16));
++      break;
++    case E_V16QImode:
++    case E_V8HImode:
++    case E_V4SImode:
++    case E_V2DImode:
++      d = gen_reg_rtx (V2DImode);
++      tem = gen_lsx_vbsrl_d (d, gen_lowpart (V2DImode, src), GEN_INT (i/16));
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  emit_insn (tem);
++  if (d != dest)
++    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
++}
++
++/* Expand a vector reduction.  FN is the binary pattern to reduce;
++   DEST is the destination; IN is the input vector.  */
++
++void
++loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
++{
++  rtx half, dst, vec = in;
++  machine_mode mode = GET_MODE (in);
++  int i;
++
++  for (i = GET_MODE_BITSIZE (mode);
++       i > GET_MODE_UNIT_BITSIZE (mode);
++       i >>= 1)
++    {
++      half = gen_reg_rtx (mode);
++      emit_reduc_half (half, vec, i);
++      if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
++        dst = dest;
++      else
++        dst = gen_reg_rtx (mode);
++      emit_insn (fn (dst, half, vec));
++      vec = dst;
++    }
+ }
+ 
+ /* Expand an integral vector unpack operation.  */
+@@ -9110,14 +9541,14 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+   rtx (*extend) (rtx, rtx);
+   rtx (*cmpFunc) (rtx, rtx, rtx);
+   rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx);
+-  rtx tmp, dest, zero;
+-  machine_mode halfmode = BLKmode;
++  rtx tmp, dest /*, zero */;
++  /* machine_mode halfmode = BLKmode; */
+ 
+   if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32)
+     {
+       switch (imode)
+ 	{
+-      
++
+ 	case E_V8SImode:
+ 	  if (unsigned_p)
+ 	    extend = gen_lasx_vext2xv_du_wu;
+@@ -9125,7 +9556,7 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+ 	    extend = gen_lasx_vext2xv_d_w;
+ 	  swap_hi_lo = gen_lasx_xvpermi_q_v8si;
+ 	  break;
+-      
++
+ 	case E_V16HImode:
+ 	  if (unsigned_p)
+ 	    extend = gen_lasx_vext2xv_wu_hu;
+@@ -9133,7 +9564,7 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+ 	    extend = gen_lasx_vext2xv_w_h;
+ 	  swap_hi_lo = gen_lasx_xvpermi_q_v16hi;
+ 	  break;
+-      
++
+ 	case E_V32QImode:
+ 	  if (unsigned_p)
+ 	    extend = gen_lasx_vext2xv_hu_bu;
+@@ -9141,7 +9572,7 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+ 	    extend = gen_lasx_vext2xv_h_b;
+ 	  swap_hi_lo = gen_lasx_xvpermi_q_v32qi;
+ 	  break;
+-      
++
+ 	default:
+ 	  gcc_unreachable ();
+ 	  break;
+@@ -9268,7 +9699,7 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+   machine_mode vmode = GET_MODE (target);
+   machine_mode imode = GET_MODE_INNER (vmode);
+   unsigned i, nelt = GET_MODE_NUNITS (vmode);
+-  unsigned nvar = 0, one_var = -1u;
++  unsigned nvar = 0 /*, one_var = -1u*/ ;
+   bool all_same = true;
+   rtx x;
+ 
+@@ -9276,7 +9707,7 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+     {
+       x = XVECEXP (vals, 0, i);
+       if (!loongarch_constant_elt_p (x))
+-	nvar++, one_var = i;
++	nvar++ /*, one_var = i */ ;
+       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ 	all_same = false;
+     }
+@@ -9311,7 +9742,7 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 		temp2 = same;
+ 	      else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
+ 		{
+-		  if(GET_CODE (same) == MEM)
++		  if (GET_CODE (same) == MEM)
+ 		    {
+ 		      rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+ 		      loongarch_emit_move (reg_tmp, same);
+@@ -9322,7 +9753,7 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 		}
+ 	      else
+ 		{
+-		  if(GET_CODE (same) == MEM)
++		  if (GET_CODE (same) == MEM)
+ 		    {
+ 		      rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+ 		      loongarch_emit_move (reg_tmp, same);
+@@ -9505,7 +9936,7 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ 	    temp2 = same;
+           else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
+             {
+-              if(GET_CODE (same) == MEM)
++              if (GET_CODE (same) == MEM)
+                 {
+                   rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+                   loongarch_emit_move (reg_tmp, same);
+@@ -9516,7 +9947,7 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+             }
+           else
+             {
+-              if(GET_CODE (same) == MEM)
++              if (GET_CODE (same) == MEM)
+                 {
+                   rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+                   loongarch_emit_move (reg_tmp, same);
+@@ -9614,9 +10045,8 @@ loongarch_expand_vector_init (rtx target, rtx vals)
+ /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
+ 
+ machine_mode
+-loongarch_hard_regno_caller_save_mode (unsigned int regno,
+-				  unsigned int nregs,
+-				  machine_mode mode)
++loongarch_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
++				       machine_mode mode)
+ {
+   /* For performance, avoid saving/restoring upper parts of a register
+      by returning MODE as save mode when the mode is known.  */
+@@ -9785,7 +10215,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
+ 	  if (mode != vimode)
+ 	    {
+ 	      xop1 = gen_reg_rtx (vimode);
+-	      emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
++	      emit_move_insn (xop1, simplify_gen_subreg (vimode, operands[1],
++							 GET_MODE (operands[1]), 0));
+ 	    }
+ 	  emit_move_insn (src1, xop1);
+ 	}
+@@ -9802,7 +10233,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
+ 	  if (mode != vimode)
+ 	    {
+ 	      xop2 = gen_reg_rtx (vimode);
+-	      emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
++	      emit_move_insn (xop2, simplify_gen_subreg (vimode, operands[2],
++							 GET_MODE (operands[2]), 0));
+ 	    }
+ 	  emit_move_insn (src2, xop2);
+ 	}
+@@ -9821,13 +10253,14 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
+ 			  gen_rtx_AND (vimode, mask, src1));
+       /* The result is placed back to a register with the mask.  */
+       emit_insn (gen_rtx_SET (mask, bsel));
+-      emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
++      emit_move_insn (operands[0], simplify_gen_subreg (mode, mask,
++							GET_MODE (mask), 0));
+     }
+ }
+ 
+ /* Expand integer vector comparison */
+ bool
+-loongarch_expand_int_vec_cmp(rtx operands[])
++loongarch_expand_int_vec_cmp (rtx operands[])
+ {
+ 
+   rtx_code code = GET_CODE (operands[1]);
+@@ -9837,7 +10270,7 @@ loongarch_expand_int_vec_cmp(rtx operands[])
+ 
+ /* Expand integer vector comparison */
+ bool
+-loongarch_expand_fp_vec_cmp(rtx operands[])
++loongarch_expand_fp_vec_cmp (rtx operands[])
+ {
+   rtx_code code = GET_CODE (operands[1]);
+   loongarch_expand_lsx_cmp (operands[0], code, operands[2], operands[3]);
+@@ -9845,61 +10278,16 @@ loongarch_expand_fp_vec_cmp(rtx operands[])
+ }
+ 
+ 
+-/* Implement TARGET_CASE_VALUES_THRESHOLD.  */
+-
+-unsigned int
+-loongarch_case_values_threshold (void)
+-{
+-    return default_case_values_threshold ();
+-}
+-
+-
+ /* Implement TARGET_SPILL_CLASS.  */
+ 
+ static reg_class_t
+ loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED,
+-		  machine_mode mode ATTRIBUTE_UNUSED)
++		       machine_mode mode ATTRIBUTE_UNUSED)
+ {
+   return NO_REGS;
+ }
+ 
+-/* Implement TARGET_LRA_P.  */
+-
+-static bool
+-loongarch_lra_p (void)
+-{
+-  return loongarch_lra_flag;
+-}
+-
+-/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
+-
+-static reg_class_t
+-loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+-				      reg_class_t best_class ATTRIBUTE_UNUSED)
+-{
+-  /* LRA will allocate an FPR for an integer mode pseudo instead of spilling
+-     to memory if an FPR is present in the allocno class.  It is rare that
+-     we actually need to place an integer mode value in an FPR so where
+-     possible limit the allocation to GR_REGS.  This will slightly pessimize
+-     code that involves integer to/from float conversions as these will have
+-     to reload into FPRs in LRA.  Such reloads are sometimes eliminated and
+-     sometimes only partially eliminated.  We choose to take this penalty
+-     in order to eliminate usage of FPRs in code that does not use floating
+-     point data.
+-
+-     This change has a similar effect to increasing the cost of FPR->GPR
+-     register moves for integer modes so that they are higher than the cost
+-     of memory but changing the allocno class is more reliable.
+-
+-     This is also similar to forbidding integer mode values in FPRs entirely
+-     but this would lead to an inconsistency in the integer to/from float
+-     instructions that say integer mode values must be placed in FPRs.  */
+-  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS)
+-    return GR_REGS;
+-  return allocno_class;
+-}
+-
+-/* Implement TARGET_PROMOTE_FUNCTION_MODE */
++/* Implement TARGET_PROMOTE_FUNCTION_MODE.  */
+ 
+ /* This function is equivalent to default_promote_function_mode_always_promote
+    except that it returns a promoted mode even if type is NULL_TREE.  This is
+@@ -9909,10 +10297,10 @@ loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+ 
+ static machine_mode
+ loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+-                            machine_mode mode,
+-                            int *punsignedp ATTRIBUTE_UNUSED,
+-                            const_tree fntype ATTRIBUTE_UNUSED,
+-                            int for_return ATTRIBUTE_UNUSED)
++				 machine_mode mode,
++				 int *punsignedp ATTRIBUTE_UNUSED,
++				 const_tree fntype ATTRIBUTE_UNUSED,
++				 int for_return ATTRIBUTE_UNUSED)
+ {
+   int unsignedp;
+ 
+@@ -9933,16 +10321,6 @@ loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
+   return !TARGET_64BIT || inprec <= 32 || outprec > 32;
+ }
+ 
+-/* Implement TARGET_CONSTANT_ALIGNMENT.  */
+-
+-static HOST_WIDE_INT
+-loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align)
+-{
+-  if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
+-    return MAX (align, BITS_PER_WORD);
+-  return align;
+-}
+-
+ /* Implement TARGET_STARTING_FRAME_OFFSET.  See loongarch_compute_frame_info
+    for details about the frame layout.  */
+ 
+@@ -9963,8 +10341,10 @@ loongarch_la464_128_store_p (rtx operands[])
+   int offset1;
+   rtx dst0 = operands[0];
+   rtx dst1 = operands[2];
++  /*
+   rtx src0 = operands[1];
+   rtx src1 = operands[3];
++  */
+   int base_reg0;
+   int base_reg1;
+ 
+@@ -10030,13 +10410,15 @@ loongarch_la464_128_load_p (rtx operands[])
+   int offset0;
+   int offset1;
+   rtx dst0 = operands[0];
++  /*
+   rtx dst1 = operands[2];
++  */
+   rtx src0 = operands[1];
+   rtx src1 = operands[3];
+   int base_reg0;
+   int base_reg1;
+   int dst_reg0;
+-  
++
+   dst_reg0 = REGNO (dst0);
+ 
+   if (GET_CODE (XEXP (src0, 0)) == PLUS)
+@@ -10209,6 +10591,138 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+   return force_reg (vec_mode, v);
+ }
+ 
++/* Use rsqrte instruction and Newton-Rhapson to compute the approximation of
++   a single precision floating point [reciprocal] square root.  */
++
++void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
++{
++  rtx x0, e0, e1, e2, mhalf, monehalf;
++  REAL_VALUE_TYPE r;
++  machine_mode imode;
++  int unspec;
++
++  x0 = gen_reg_rtx (mode);
++  e0 = gen_reg_rtx (mode);
++  e1 = gen_reg_rtx (mode);
++  e2 = gen_reg_rtx (mode);
++
++  real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL);
++  mhalf = const_double_from_real_value (r, SFmode);
++
++  real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1);
++  monehalf = const_double_from_real_value (r, SFmode);
++  unspec = UNSPEC_RSQRTE;
++
++  if (VECTOR_MODE_P (mode))
++    {
++      mhalf = loongarch_build_const_vector (mode, true, mhalf);
++      monehalf = loongarch_build_const_vector (mode, true, monehalf);
++      if (GET_MODE_SIZE (mode) == 32)
++	imode = mode == V4DFmode ? V4DImode : V8SImode;
++      if (GET_MODE_SIZE (mode) == 16)
++	imode = mode == V2DFmode ? V2DImode : V4SImode;
++    }
++
++  /* rsqrt(a) =  rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a))
++     sqrt(a)  =  a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a))*/
++
++  a = force_reg (mode, a);
++
++  /* x0 = rsqrt(a) estimate */
++  emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
++                                              unspec)));
++
++  /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
++  if (!recip)
++    {
++      rtx zero = force_reg (mode, CONST0_RTX(mode));
++
++      if (VECTOR_MODE_P (mode))
++	{
++	  rtx mask = gen_reg_rtx (imode);
++	  emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero)));
++	  emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, gen_lowpart(mode, mask))));
++        }
++      else
++	{
++	  rtx target = emit_conditional_move (x0, GT, a, zero, mode,
++                    x0, zero, mode, 0);
++          if (target != x0)
++            emit_move_insn (x0, target);
++        }
++    }
++
++  /* e0 = x0 * a */
++  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
++  /* e1 = e0 * x0 */
++  emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
++
++  /* e2 = 1.5 - e1 * 0.5 */
++  mhalf = force_reg (mode, mhalf);
++  monehalf = force_reg (mode, monehalf);
++  emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, gen_rtx_NEG(mode, e1), mhalf, monehalf)));
++
++  if (recip)
++    /* res = e2 * x0 */
++    emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2)));
++  else
++    /* res = e2 * e0 */
++    emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0)));
++}
++
++/* Use recipe instruction and Newton-Rhapson to compute the approximation of
++   a single precision floating point divide.  */
++
++void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
++{
++  rtx x0, x1, e0, mtwo;
++  REAL_VALUE_TYPE r;
++  x0 = gen_reg_rtx (mode);
++  e0 = gen_reg_rtx (mode);
++  x1 = gen_reg_rtx (mode);
++
++  real_arithmetic (&r, ABS_EXPR, &dconst2, NULL);
++  mtwo = const_double_from_real_value (r, SFmode);
++
++  if (VECTOR_MODE_P (mode))
++    mtwo = loongarch_build_const_vector (mode, true, mtwo);
++
++  mtwo = force_reg (mode, mtwo);
++
++  /* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */
++
++  /* x0 = 1./b estimate */
++  emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
++                                              UNSPEC_RECIPE)));
++  /* 2.0 - b * x0; */
++  emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode,gen_rtx_NEG(mode, b), x0, mtwo)));
++
++  /* x1 = x0 * e0 */
++  emit_insn (gen_rtx_SET (x1, gen_rtx_MULT (mode, x0, e0)));
++
++  /* res = a * x1 */
++  emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
++}
++
++/* LoongArch only implements preld hint=0 (prefetch for load) and hint=8
++   (prefetch for store), other hint just scale to hint = 0 and hint = 1. */
++
++rtx
++loongarch_prefetch_cookie (rtx write, rtx locality)
++{
++  if (INTVAL (locality) == 1 && INTVAL (write) == 0)
++    return GEN_INT (INTVAL (write) + 2);
++
++  /* store.  */
++  if (INTVAL (write) == 1)
++    return GEN_INT (INTVAL (write) + 7);
++
++  /* load.  */
++  if (INTVAL (write) == 0)
++    return GEN_INT (INTVAL (write));
++
++  gcc_unreachable ();
++}
+ 
+ 
+ /* Initialize the GCC target structure.  */
+@@ -10225,10 +10739,6 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_LEGITIMIZE_ADDRESS
+ #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address
+ 
+-#undef TARGET_ASM_FUNCTION_PROLOGUE
+-#define TARGET_ASM_FUNCTION_PROLOGUE loongarch_output_function_prologue
+-#undef TARGET_ASM_FUNCTION_EPILOGUE
+-#define TARGET_ASM_FUNCTION_EPILOGUE loongarch_output_function_epilogue
+ #undef TARGET_ASM_SELECT_RTX_SECTION
+ #define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section
+ #undef TARGET_ASM_FUNCTION_RODATA_SECTION
+@@ -10249,19 +10759,12 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+ #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+   loongarch_multipass_dfa_lookahead
+-#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+-#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+-  loongarch_small_register_classes_for_mode_p
+ 
+ #undef TARGET_FUNCTION_OK_FOR_SIBCALL
+ #define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall
+ 
+-#undef TARGET_INSERT_ATTRIBUTES
+-#define TARGET_INSERT_ATTRIBUTES loongarch_insert_attributes
+-#undef TARGET_MERGE_DECL_ATTRIBUTES
+-#define TARGET_MERGE_DECL_ATTRIBUTES loongarch_merge_decl_attributes
+-#undef TARGET_CAN_INLINE_P
+-#define TARGET_CAN_INLINE_P loongarch_can_inline_p
++#undef TARGET_GET_DRAP_RTX
++#define TARGET_GET_DRAP_RTX loongarch_get_drap_rtx
+ 
+ #undef TARGET_VALID_POINTER_MODE
+ #define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode
+@@ -10276,43 +10779,49 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+ #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+   loongarch_builtin_vectorization_cost
++#undef TARGET_VECTORIZE_ADD_STMT_COST
++#define TARGET_VECTORIZE_ADD_STMT_COST loongarch_add_stmt_cost
+ 
++#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
++#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT loongarch_builtin_support_vector_misalignment
++#undef TARGET_MODE_REP_EXTENDED
++#define TARGET_MODE_REP_EXTENDED loongarch_mode_rep_extended
+ 
+ #undef TARGET_IN_SMALL_DATA_P
+ #define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p
+ 
+-#undef TARGET_MACHINE_DEPENDENT_REORG
+-#define TARGET_MACHINE_DEPENDENT_REORG loongarch_reorg
+-
+-#undef  TARGET_PREFERRED_RELOAD_CLASS
++#undef TARGET_PREFERRED_RELOAD_CLASS
+ #define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class
+ 
+-#undef TARGET_EXPAND_TO_RTL_HOOK
+-#define TARGET_EXPAND_TO_RTL_HOOK loongarch_expand_to_rtl_hook
+-#undef TARGET_ASM_FILE_START
+-#define TARGET_ASM_FILE_START loongarch_file_start
+ #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+ #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+ 
+ #undef TARGET_EXPAND_BUILTIN_VA_START
+ #define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start
+ 
+-#undef  TARGET_PROMOTE_FUNCTION_MODE
++#undef TARGET_PROMOTE_FUNCTION_MODE
+ #define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode
+ #undef TARGET_RETURN_IN_MEMORY
+ #define TARGET_RETURN_IN_MEMORY loongarch_return_in_memory
+ 
++#undef TARGET_FUNCTION_VALUE
++#define TARGET_FUNCTION_VALUE loongarch_function_value
++#undef TARGET_LIBCALL_VALUE
++#define TARGET_LIBCALL_VALUE loongarch_libcall_value
++
+ #undef TARGET_ASM_OUTPUT_MI_THUNK
+ #define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk
+ #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+-#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
++  hook_bool_const_tree_hwi_hwi_const_tree_true
+ 
+ #undef TARGET_PRINT_OPERAND
+ #define TARGET_PRINT_OPERAND loongarch_print_operand
+ #undef TARGET_PRINT_OPERAND_ADDRESS
+ #define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address
+ #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+-#define TARGET_PRINT_OPERAND_PUNCT_VALID_P loongarch_print_operand_punct_valid_p
++#define TARGET_PRINT_OPERAND_PUNCT_VALID_P \
++  loongarch_print_operand_punct_valid_p
+ 
+ #undef TARGET_SETUP_INCOMING_VARARGS
+ #define TARGET_SETUP_INCOMING_VARARGS loongarch_setup_incoming_varargs
+@@ -10344,6 +10853,10 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+   loongarch_autovectorize_vector_sizes
+ 
++#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
++#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
++  loongarch_builtin_vectorized_function
++
+ #undef TARGET_INIT_BUILTINS
+ #define TARGET_INIT_BUILTINS loongarch_init_builtins
+ #undef TARGET_BUILTIN_DECL
+@@ -10351,8 +10864,11 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_EXPAND_BUILTIN
+ #define TARGET_EXPAND_BUILTIN loongarch_expand_builtin
+ 
++/* The generic ELF target does not always have TLS support.  */
++#ifdef HAVE_AS_TLS
+ #undef TARGET_HAVE_TLS
+ #define TARGET_HAVE_TLS HAVE_AS_TLS
++#endif
+ 
+ #undef TARGET_CANNOT_FORCE_CONST_MEM
+ #define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem
+@@ -10360,35 +10876,24 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_LEGITIMATE_CONSTANT_P
+ #define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p
+ 
+-#undef TARGET_ENCODE_SECTION_INFO
+-#define TARGET_ENCODE_SECTION_INFO loongarch_encode_section_info
+-
+-#undef TARGET_ATTRIBUTE_TABLE
+-#define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table
+ /* All our function attributes are related to how out-of-line copies should
+    be compiled or called.  They don't in themselves prevent inlining.  */
+ #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+ #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
+ 
+ #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+-#define TARGET_USE_BLOCKS_FOR_CONSTANT_P loongarch_use_blocks_for_constant_p
+-#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+-#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
+-
+-#undef  TARGET_COMP_TYPE_ATTRIBUTES
+-#define TARGET_COMP_TYPE_ATTRIBUTES loongarch_comp_type_attributes
++#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+ 
+ #ifdef HAVE_AS_DTPRELWORD
+ #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+ #define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel
+ #endif
+-#undef TARGET_DWARF_REGISTER_SPAN
+-#define TARGET_DWARF_REGISTER_SPAN loongarch_dwarf_register_span
+-#undef TARGET_DWARF_FRAME_REG_MODE
+-#define TARGET_DWARF_FRAME_REG_MODE loongarch_dwarf_frame_reg_mode
+ 
+ #undef TARGET_LEGITIMATE_ADDRESS_P
+-#define TARGET_LEGITIMATE_ADDRESS_P	loongarch_legitimate_address_p
++#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p
++
++#undef TARGET_COMPUTE_FRAME_LAYOUT
++#define TARGET_COMPUTE_FRAME_LAYOUT loongarch_compute_frame_info
+ 
+ #undef TARGET_FRAME_POINTER_REQUIRED
+ #define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required
+@@ -10402,18 +10907,12 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_TRAMPOLINE_INIT
+ #define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init
+ 
+-#undef TARGET_SHIFT_TRUNCATION_MASK
+-#define TARGET_SHIFT_TRUNCATION_MASK loongarch_shift_truncation_mask
+-
+ #undef TARGET_VECTORIZE_VEC_PERM_CONST
+ #define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const
+ 
+ #undef TARGET_SCHED_REASSOCIATION_WIDTH
+ #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width
+ 
+-#undef TARGET_CASE_VALUES_THRESHOLD
+-#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold
+-
+ #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+ #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv
+ 
+@@ -10422,13 +10921,6 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ 
+ #undef TARGET_SPILL_CLASS
+ #define TARGET_SPILL_CLASS loongarch_spill_class
+-#undef TARGET_LRA_P
+-#define TARGET_LRA_P loongarch_lra_p
+-#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+-#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS loongarch_ira_change_pseudo_allocno_class
+-
+-#undef TARGET_HARD_REGNO_SCRATCH_OK
+-#define TARGET_HARD_REGNO_SCRATCH_OK loongarch_hard_regno_scratch_ok
+ 
+ #undef TARGET_HARD_REGNO_NREGS
+ #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs
+@@ -10445,9 +10937,6 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
+ #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
+ 
+-#undef TARGET_SECONDARY_MEMORY_NEEDED
+-#define TARGET_SECONDARY_MEMORY_NEEDED loongarch_secondary_memory_needed
+-
+ #undef TARGET_CAN_CHANGE_MODE_CLASS
+ #define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class
+ 
+@@ -10460,6 +10949,9 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
+ #undef TARGET_STARTING_FRAME_OFFSET
+ #define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset
+ 
++#undef TARGET_SECONDARY_RELOAD
++#define TARGET_SECONDARY_RELOAD loongarch_secondary_reload
++
+ struct gcc_target targetm = TARGET_INITIALIZER;
+-
++
+ #include "gt-loongarch.h"
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 18d17afb8..1b26230cb 100644
+--- a/gcc/config/loongarch/loongarch.h
++++ b/gcc/config/loongarch/loongarch.h
+@@ -1,9 +1,7 @@
+-/* Definitions of target machine for GNU compiler.  LARCH version.
+-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+-   Contributed by A. Lichnewsky (lich@inria.inria.fr).
+-   Changed by Michael Meissner	(meissner@osf.org).
+-   64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
+-   Brendan Eich (brendan@microunity.com).
++/* Definitions of target machine for GNU compiler.  LoongArch version.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Technology Co. Ltd.
++   Based on MIPS and RISC-V target for GNU compiler.
+ 
+ This file is part of GCC.
+ 
+@@ -21,318 +19,36 @@ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+-
+-#include "config/vxworks-dummy.h"
+-
+-#ifdef GENERATOR_FILE
+-/* This is used in some insn conditions, so needs to be declared, but
+-   does not need to be defined.  */
+-extern int target_flags_explicit;
+-#endif
+-
+-/* LARCH external variables defined in loongarch.c.  */
+-
+-/* Which ABI to use.  ABILP32 (original 32, or o32), ABILPX32 (n32),
+-   ABILP64 (n64) are all defined by SGI.  */
+-
+-#define ABILP32  0
+-#define ABILPX32 1
+-#define ABILP64  2
+-
+-/* Information about one recognized processor.  Defined here for the
+-   benefit of TARGET_CPU_CPP_BUILTINS.  */
+-struct loongarch_cpu_info {
+-  /* The 'canonical' name of the processor as far as GCC is concerned.
+-     It's typically a manufacturer's prefix followed by a numerical
+-     designation.  It should be lowercase.  */
+-  const char *name;
+-
+-  /* The internal processor number that most closely matches this
+-     entry.  Several processors can have the same value, if there's no
+-     difference between them from GCC's point of view.  */
+-  enum processor cpu;
+-
+-  /* The ISA level that the processor implements.  */
+-  int isa;
+-
+-  /* A mask of PTF_* values.  */
+-  unsigned int tune_flags;
+-};
++/* LoongArch external variables defined in loongarch.c.  */
+ 
+ #include "config/loongarch/loongarch-opts.h"
+ 
+ /* Macros to silence warnings about numbers being signed in traditional
+    C and unsigned in ISO C when compiled on 32-bit hosts.  */
+ 
+-#define BITMASK_HIGH	(((unsigned long)1) << 31)	/* 0x80000000 */
+-#define BITMASK_UPPER16	((unsigned long)0xffff << 16)	/* 0xffff0000 */
+-#define BITMASK_LOWER16	((unsigned long)0xffff)		/* 0x0000ffff */
++#define BITMASK_HIGH (((unsigned long) 1) << 31) /* 0x80000000  */
+ 
+-
+ /* Run-time compilation parameters selecting different hardware subsets.  */
+ 
+-/* True if we are generating position-independent VxWorks RTP code.  */
+-#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic)
+-
+-/* True if we can optimize sibling calls.  For simplicity, we only
+-   handle cases in which call_insn_operand will reject invalid
+-   sibcall addresses.  There are two cases in which this isn't true:
+-
+-      - TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS.  call_insn_operand
+-	accepts global constants, but all sibcalls must be indirect.  */
+-#define TARGET_SIBCALLS  (1)
+-
+-/* True if we can use the J and JAL instructions.  */
+-#define TARGET_ABSOLUTE_JUMPS (!flag_pic)
+-
+-/* True if the output must have a writable .eh_frame.
+-   See ASM_PREFERRED_EH_DATA_FORMAT for details.  */
+-#ifdef HAVE_LD_PERSONALITY_RELAXATION
+-#define TARGET_WRITABLE_EH_FRAME 0
+-#else
+-#define TARGET_WRITABLE_EH_FRAME (flag_pic && TARGET_SHARED)
+-#endif
+-
+-
+-/* ISA has LSA available.  */
+-#define ISA_HAS_LSA		(1)
+-
+-/* ISA has DLSA available.  */
+-#define ISA_HAS_DLSA		(TARGET_64BIT)
+-
+-/* Architecture target defines.  */
+-#define TARGET_LOONGARCH64		    (loongarch_arch == PROCESSOR_LOONGARCH64)
+-#define TUNE_LOONGARCH64		    (loongarch_tune == PROCESSOR_LOONGARCH64)
+-#define TARGET_LA464		            (loongarch_arch == PROCESSOR_LA464)
+-#define TUNE_LA464			    (loongarch_tune == PROCESSOR_LA464)
+-/* True if the pre-reload scheduler should try to create chains of
+-   multiply-add or multiply-subtract instructions.  For example,
+-   suppose we have:
+-
+-	t1 = a * b
+-	t2 = t1 + c * d
+-	t3 = e * f
+-	t4 = t3 - g * h
+-
+-   t1 will have a higher priority than t2 and t3 will have a higher
+-   priority than t4.  However, before reload, there is no dependence
+-   between t1 and t3, and they can often have similar priorities.
+-   The scheduler will then tend to prefer:
+-
+-	t1 = a * b
+-	t3 = e * f
+-	t2 = t1 + c * d
+-	t4 = t3 - g * h
+-
+-   which stops us from making full use of macc/madd-style instructions.
+-   This sort of situation occurs frequently in Fourier transforms and
+-   in unrolled loops.
+-
+-   To counter this, the TUNE_MACC_CHAINS code will reorder the ready
+-   queue so that chained multiply-add and multiply-subtract instructions
+-   appear ahead of any other instruction that is likely to clobber lo.
+-   In the example above, if t2 and t3 become ready at the same time,
+-   the code ensures that t2 is scheduled first.
+-
+-   Multiply-accumulate instructions are a bigger win for some targets
+-   than others, so this macro is defined on an opt-in basis.  */
+-#define TUNE_MACC_CHAINS	    0
+-
+-#define TARGET_OLDABI		    (loongarch_abi == ABILP32)
+-#define TARGET_NEWABI		    (loongarch_abi == ABILPX32 || loongarch_abi == ABILP64)
+-
+-/* TARGET_HARD_FLOAT and TARGET_SOFT_FLOAT reflect whether the FPU is
+-   directly accessible, while the command-line options select
+-   TARGET_HARD_FLOAT_ABI and TARGET_SOFT_FLOAT_ABI to reflect the ABI
+-   in use.  */
+-#define TARGET_HARD_FLOAT (TARGET_HARD_FLOAT_ABI)
+-#define TARGET_SOFT_FLOAT (TARGET_SOFT_FLOAT_ABI)
+-
+-/* False if SC acts as a memory barrier with respect to itself,
+-   otherwise a SYNC will be emitted after SC for atomic operations
+-   that require ordering between the SC and following loads and
+-   stores.  It does not tell anything about ordering of loads and
+-   stores prior to and following the SC, only about the SC itself and
+-   those loads and stores follow it.  */
+-#define TARGET_SYNC_AFTER_SC (1)
+-
+-/* Define preprocessor macros for the -march and -mtune options.
+-   PREFIX is either _LARCH_ARCH or _LARCH_TUNE, INFO is the selected
+-   processor.  If INFO's canonical name is "foo", define PREFIX to
+-   be "foo", and define an additional macro PREFIX_FOO.  */
+-#define LARCH_CPP_SET_PROCESSOR(PREFIX, INFO)			\
+-  do								\
+-    {								\
+-      char *macro, *p;						\
+-								\
+-      macro = concat ((PREFIX), "_", (INFO)->name, NULL);	\
+-      for (p = macro; *p != 0; p++)				\
+-        if (*p == '+')                                          \
+-          *p = 'P';                                             \
+-        else                                                    \
+-          *p = TOUPPER (*p);                                    \
+-								\
+-      builtin_define (macro);					\
+-      builtin_define_with_value ((PREFIX), (INFO)->name, 1);	\
+-      free (macro);						\
+-    }								\
+-  while (0)
+-
+ /* Target CPU builtins.  */
+-#define TARGET_CPU_CPP_BUILTINS()	loongarch_cpu_cpp_builtins (pfile)
+-
+-/* Target CPU versions for D.  */
+-#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions
++#define TARGET_CPU_CPP_BUILTINS() loongarch_cpu_cpp_builtins (pfile)
+ 
+-/* Default target_flags if no switches are specified  */
+-
+-#ifndef TARGET_DEFAULT
+-#define TARGET_DEFAULT 0
+-#endif
+-
+-#ifndef TARGET_CPU_DEFAULT
+-#define TARGET_CPU_DEFAULT 0
+-#endif
++/* Default target_flags if no switches are specified.  */
+ 
+ #ifdef IN_LIBGCC2
+ #undef TARGET_64BIT
+-/* Make this compile time constant for libgcc2 */
++/* Make this compile time constant for libgcc2.  */
+ #ifdef __loongarch64
+-#define TARGET_64BIT		1
++#define TARGET_64BIT 1
+ #else
+-#define TARGET_64BIT		0
++#define TARGET_64BIT 0
+ #endif
+-#endif /* IN_LIBGCC2 */
++#endif /* IN_LIBGCC2  */
+ 
+ #define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+ 
+-#ifndef MULTILIB_ISA_DEFAULT
+-#if LARCH_ISA_DEFAULT == 0
+-#define MULTILIB_ISA_DEFAULT "loongarch64"
+-#endif
+-#endif
+-
+-#ifndef LARCH_ABI_DEFAULT
+-#define LARCH_ABI_DEFAULT ABILP32
+-#endif
+-
+-/* Use the most portable ABI flag for the ASM specs.  */
+-
+-#if LARCH_ABI_DEFAULT == ABILP32
+-#define MULTILIB_ABI_DEFAULT "mabi=lp32"
+-#elif LARCH_ABI_DEFAULT == ABILP64
+-#define MULTILIB_ABI_DEFAULT "mabi=lp64"
+-#endif
+-
+-#ifndef MULTILIB_DEFAULTS
+-#define MULTILIB_DEFAULTS \
+-    {MULTILIB_ISA_DEFAULT, MULTILIB_ABI_DEFAULT }
+-#endif
+-
+-/* A spec condition that matches all -loongarch arguments.  */
+-
+-#define LARCH_ISA_LEVEL_OPTION_SPEC \
+-  "loongarch"
+-
+-/* A spec condition that matches all architecture arguments.  */
+-
+-#define LARCH_ARCH_OPTION_SPEC \
+-  LARCH_ISA_LEVEL_OPTION_SPEC "|march=*"
+-
+-/* A spec that infers a -loongarch argument from an -march argument.  */
+-
+-#define LARCH_ISA_LEVEL_SPEC \
+-  "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;:}"
+-
+-/* A spec that injects the default multilib ISA if no architecture is
+-   specified.  */
+-
+-#define LARCH_DEFAULT_ISA_LEVEL_SPEC \
+-  "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;: \
+-     %{!march=*: -" MULTILIB_ISA_DEFAULT "}}"
+-
+-/* A spec that infers a -mhard-float or -msoft-float setting from an
+-   -march argument.  Note that soft-float and hard-float code are not
+-   link-compatible.  */
+-
+-#define LARCH_ARCH_FLOAT_SPEC \
+-  "%{mhard-float|msoft-float|mno-float|march=loongarch*:; \
+-     march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \
+-     |march=34kc|march=34kn|march=74kc|march=1004kc|march=5kc \
+-     |march=m14k*|march=m5101|march=octeon|march=xlr: -msoft-float; \
+-     march=*: -mhard-float}"
+-
+-/* A spec condition that matches 32-bit options.  It only works if
+-   LARCH_ISA_LEVEL_SPEC has been applied.  */
+-
+-#define LARCH_32BIT_OPTION_SPEC \
+-  "loongarch1|loongarch2|loongarch32*|mgp32"
+-
+-#if (LARCH_ABI_DEFAULT == ABILPX32 \
+-     || LARCH_ABI_DEFAULT == ABILP64)
+-#define OPT_ARCH64 "mabi=32|mgp32:;"
+-#define OPT_ARCH32 "mabi=32|mgp32"
+-#else
+-#define OPT_ARCH64 "mabi=o64|mabi=n32|mabi=64|mgp64"
+-#define OPT_ARCH32 "mabi=o64|mabi=n32|mabi=64|mgp64:;"
+-#endif
+-
+-/* Support for a compile-time default CPU, et cetera.  The rules are:
+-   --with-arch is ignored if -march is specified or a -loongarch is specified
+-   ; likewise --with-arch-32 and --with-arch-64.
+-   --with-tune is ignored if -mtune is specified; likewise
+-     --with-tune-32 and --with-tune-64.
+-   --with-abi is ignored if -mabi is specified.
+-   --with-float is ignored if -mhard-float or -msoft-float are
+-     specified.
+-   --with-fpu is ignored if -msoft-float, -msingle-float or -mdouble-float are
+-     specified.
+-   --with-fp-32 is ignored if -msoft-float, -msingle-float, -mlsx or -mfp are
+-     specified.
+-   --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
+-     specified. */
+-#define OPTION_DEFAULT_SPECS \
+-  {"arch", "%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \
+-  {"arch_32", "%{" OPT_ARCH32 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+-  {"arch_64", "%{" OPT_ARCH64 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+-  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+-  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+-  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+-  {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
+-  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \
+-  {"fpu", "%{!msoft-float:%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}}" }, \
+-  {"fp_32", "%{" OPT_ARCH32 \
+-	    ":%{!msoft-float:%{!msingle-float:%{!mfp*:%{!mlsx:%{!mloongson-asx:-mfp%(VALUE)}}}}}" }, \
+-  {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }
+-
+-/* A spec that infers the:
+-   -mlsx setting from a -march=la464 argument.
+-   -mlasx setting from a -march=la464 argument.  */
+-#define BASE_DRIVER_SELF_SPECS			\
+-  LARCH_ASE_LSX_SPEC				\
+-  LARCH_ASE_LASX_SPEC
+-
+-#define LARCH_ASE_LSX_SPEC  \
+-  "%{!mno-lsx: \
+-     %{march=la464: -mlsx}}"
+-
+-#define LARCH_ASE_LASX_SPEC  \
+-  "%{!mno-lasx: \
+-     %{march=la464: -mlasx}}"
+-
+-#define DRIVER_SELF_SPECS    \
+-  BASE_DRIVER_SELF_SPECS
+-
+-/* from N_LARCH */
+-#define ABI_SPEC	\
+-  "%{mabi=lp32:32}"	\
+-  "%{mabi=lp64:64}"	\
+-
+-#define STARTFILE_PREFIX_SPEC	\
+-  "/lib" ABI_SPEC "/ "		\
+-  "/usr/lib" ABI_SPEC "/ "	\
+-  "/lib/ "			\
+-  "/usr/lib/ "
++/* Driver native functions for SPEC processing in the GCC driver.  */
++#include "loongarch-driver.h"
+ 
+ /* This definition replaces the formerly used 'm' constraint with a
+    different constraint letter in order to avoid changing semantics of
+@@ -341,71 +57,11 @@ struct loongarch_cpu_info {
+    must not be used in insn definitions or inline assemblies.  */
+ #define TARGET_MEM_CONSTRAINT 'w'
+ 
+-/* True if the file format uses 64-bit symbols.  At present, this is
+-   only true for n64, which uses 64-bit ELF.  */
+-#define FILE_HAS_64BIT_SYMBOLS	(loongarch_abi == ABILP64)
+-
+-/* True if symbols are 64 bits wide.  This is usually determined by
+-   the ABI's file format, but it can be overridden by -msym32.  Note that
+-   overriding the size with -msym32 changes the ABI of relocatable objects,
+-   although it doesn't change the ABI of a fully-linked object.  */
+-#define ABI_HAS_64BIT_SYMBOLS	(FILE_HAS_64BIT_SYMBOLS \
+-				 && Pmode == DImode)
+-
+-/* ISA supports instructions DMUL, DMULU, DMUH, DMUHU.  */
+-#define ISA_HAS_DMUL		(TARGET_64BIT)
+-
+-/* ISA has floating-point RECIP.fmt and RSQRT.fmt instructions.  The
+-   LARCH64 rev. 1 ISA says that RECIP.D and RSQRT.D are unpredictable when
+-   doubles are stored in pairs of FPRs, so for safety's sake, we apply
+-   this restriction to the LARCH IV ISA too.  */
+-#define ISA_HAS_FP_RECIP_RSQRT(MODE)					\
+-				((MODE) == SFmode			\
+-				       || (TARGET_FLOAT64		\
+-					   && (MODE) == DFmode))
+-
+-/* The LSX ASE is available.  */
+-#define ISA_HAS_LSX		(TARGET_LSX)
+-
+-/* The LASX ASE is available.  */
+-#define ISA_HAS_LASX		(TARGET_LASX)
+-
+ /* Tell collect what flags to pass to nm.  */
+ #ifndef NM_FLAGS
+ #define NM_FLAGS "-Bn"
+ #endif
+ 
+-
+-/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to
+-   the assembler.  It may be overridden by subtargets.
+-
+-   Beginning with gas 2.13, -mdebug must be passed to correctly handle
+-   COFF debugging info.  */
+-
+-#ifndef SUBTARGET_ASM_DEBUGGING_SPEC
+-#define SUBTARGET_ASM_DEBUGGING_SPEC "\
+-%{g} %{g0} %{g1} %{g2} %{g3} \
+-%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \
+-%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \
+-%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3}"
+-#endif
+-
+-/* FP_ASM_SPEC represents the floating-point options that must be passed
+-   to the assembler when FPXX support exists.  Prior to that point the
+-   assembler could accept the options but were not required for
+-   correctness.  We only add the options when absolutely necessary
+-   because passing -msoft-float to the assembler will cause it to reject
+-   all hard-float instructions which may require some user code to be
+-   updated.  */
+-
+-#ifdef HAVE_AS_DOT_MODULE
+-#define FP_ASM_SPEC "\
+-%{mhard-float} %{msoft-float} \
+-%{msingle-float} %{mdouble-float}"
+-#else
+-#define FP_ASM_SPEC
+-#endif
+-
+ /* SUBTARGET_ASM_SPEC is always passed to the assembler.  It may be
+    overridden by subtargets.  */
+ 
+@@ -414,29 +70,21 @@ struct loongarch_cpu_info {
+ #endif
+ 
+ #undef ASM_SPEC
+-#define ASM_SPEC "\
+-%{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \
+-"
++#define ASM_SPEC "%{mabi=lp64d:-mabi=lp64} %{subtarget_asm_spec}"
++
+ /* Extra switches sometimes passed to the linker.  */
+ 
+ #ifndef LINK_SPEC
+ #define LINK_SPEC ""
+-#endif  /* LINK_SPEC defined */
+-
++#endif /* LINK_SPEC defined  */
+ 
+-/* Specs for the compiler proper */
+-
+-/* SUBTARGET_CC1_SPEC is passed to the compiler proper.  It may be
+-   overridden by subtargets.  */
+-#ifndef SUBTARGET_CC1_SPEC
+-#define SUBTARGET_CC1_SPEC ""
+-#endif
++/* Specs for the compiler proper.  */
+ 
+ /* CC1_SPEC is the set of arguments to pass to the compiler proper.  */
+ 
+ #undef CC1_SPEC
+ #define CC1_SPEC "\
+-%{G*} %{EB:-meb} %{EL:-mel} %{EB:%{EL:%emay not use both -EB and -EL}} \
++%{G*} \
+ %(subtarget_cc1_spec)"
+ 
+ /* Preprocessor specs.  */
+@@ -459,63 +107,38 @@ struct loongarch_cpu_info {
+ 
+    Do not define this macro if it does not need to do anything.  */
+ 
+-#define EXTRA_SPECS							\
+-  { "subtarget_cc1_spec", SUBTARGET_CC1_SPEC },				\
+-  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },				\
+-  { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC },	\
+-  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },				\
+-  { "asm_abi_default_spec", "-" MULTILIB_ABI_DEFAULT },			\
+-  SUBTARGET_EXTRA_SPECS
+-
+-#ifndef SUBTARGET_EXTRA_SPECS
+-#define SUBTARGET_EXTRA_SPECS
+-#endif
+-
+-#define DBX_DEBUGGING_INFO 1		/* generate stabs (OSF/rose) */
+-#define DWARF2_DEBUGGING_INFO 1         /* dwarf2 debugging info */
+-
+-#ifndef PREFERRED_DEBUGGING_TYPE
+-#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+-#endif
+-
+-/* The size of DWARF addresses should be the same as the size of symbols
+-   in the target file format.  They shouldn't depend on things like -msym32,
+-   because many DWARF consumers do not allow the mixture of address sizes
+-   that one would then get from linking -msym32 code with -msym64 code.
+-*/
+-#define DWARF2_ADDR_SIZE (FILE_HAS_64BIT_SYMBOLS ? 8 : 4)
+-
+-/* By default, turn on GDB extensions.  */
+-#define DEFAULT_GDB_EXTENSIONS 1
++#define EXTRA_SPECS \
++  {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
++  {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
++  {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
+ 
+ /* Registers may have a prefix which can be ignored when matching
+    user asm and register definitions.  */
+ #ifndef REGISTER_PREFIX
+-#define REGISTER_PREFIX    "$"
++#define REGISTER_PREFIX "$"
+ #endif
+ 
+ /* Local compiler-generated symbols must have a prefix that the assembler
+-   understands.   By default, this is $, although some targets (e.g.,
+-   NetBSD-ELF) need to override this.  */
++   understands.  */
+ 
+-#ifndef LOCAL_LABEL_PREFIX
+-#define LOCAL_LABEL_PREFIX	"$"
+-#endif
++#define LOCAL_LABEL_PREFIX "."
+ 
+ /* By default on the loongarch, external symbols do not have an underscore
+-   prepended, but some targets (e.g., NetBSD) require this.  */
++   prepended.  */
+ 
+-#ifndef USER_LABEL_PREFIX
+-#define USER_LABEL_PREFIX	""
++#define USER_LABEL_PREFIX ""
++
++#ifndef PREFERRED_DEBUGGING_TYPE
++#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+ #endif
+ 
+-/* On Sun 4, this limit is 2048.  We use 1500 to be safe,
+-   since the length can run past this up to a continuation point.  */
+-#undef DBX_CONTIN_LENGTH
+-#define DBX_CONTIN_LENGTH 1500
++/* The size of DWARF addresses should be the same as the size of symbols
++   in the target file format.  */
++#define DWARF2_ADDR_SIZE (TARGET_64BIT ? 8 : 4)
+ 
+-/* How to renumber registers for dbx and gdb.  */
+-#define DBX_REGISTER_NUMBER(REGNO) loongarch_dbx_regno[REGNO]
++/* By default, produce dwarf version 2 format debugging output in response
++   to the ‘-g’ option.  */
++#define DWARF2_DEBUGGING_INFO 1
+ 
+ /* The mapping from gcc register number to DWARF 2 CFA column number.  */
+ #define DWARF_FRAME_REGNUM(REGNO) loongarch_dwarf_regno[REGNO]
+@@ -530,7 +153,7 @@ struct loongarch_cpu_info {
+ #define EH_RETURN_DATA_REGNO(N) \
+   ((N) < (4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM)
+ 
+-#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, GP_ARG_FIRST + 4)
++#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, GP_ARG_FIRST + 4)
+ 
+ #define EH_USES(N) loongarch_eh_uses (N)
+ 
+@@ -539,19 +162,7 @@ struct loongarch_cpu_info {
+    SFmode register saves.  */
+ #define DWARF_CIE_DATA_ALIGNMENT -4
+ 
+-/* Correct the offset of automatic variables and arguments.  Note that
+-   the LARCH debug format wants all automatic variables and arguments
+-   to be in terms of the virtual frame pointer (stack pointer before
+-   any adjustment in the function), while the LARCH 3.0 linker wants
+-   the frame pointer to be the stack pointer after the initial
+-   adjustment.  */
+-
+-#define DEBUGGER_AUTO_OFFSET(X)				\
+-  loongarch_debugger_offset (X, (HOST_WIDE_INT) 0)
+-#define DEBUGGER_ARG_OFFSET(OFFSET, X)			\
+-  loongarch_debugger_offset (X, (HOST_WIDE_INT) OFFSET)
+-
+-/* Target machine storage layout */
++/* Target machine storage layout.  */
+ 
+ #define BITS_BIG_ENDIAN 0
+ #define BYTES_BIG_ENDIAN 0
+@@ -576,27 +187,19 @@ struct loongarch_cpu_info {
+ #define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT)
+ 
+ /* For LARCH, width of a floating point register.  */
+-#define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4)
+-
+-/* The number of consecutive floating-point registers needed to store the
+-   largest format supported by the FPU.  */
+-#define MAX_FPRS_PER_FMT (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2)
+-
+-/* The number of consecutive floating-point registers needed to store the
+-   smallest format supported by the FPU.  */
+-#define MIN_FPRS_PER_FMT 1
++#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4)
+ 
+ /* The largest size of value that can be held in floating-point
+    registers and moved with a single instruction.  */
+ #define UNITS_PER_HWFPVALUE \
+-  (TARGET_SOFT_FLOAT_ABI ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG)
++  (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FPREG)
+ 
+ /* The largest size of value that can be held in floating-point
+    registers.  */
+-#define UNITS_PER_FPVALUE			\
+-  (TARGET_SOFT_FLOAT_ABI ? 0			\
+-   : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG	\
+-   : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
++#define UNITS_PER_FPVALUE \
++  (TARGET_SOFT_FLOAT ? 0 \
++   : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \
++			 : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+ 
+ /* The number of bytes in a double.  */
+ #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
+@@ -609,7 +212,7 @@ struct loongarch_cpu_info {
+ 
+ #define FLOAT_TYPE_SIZE 32
+ #define DOUBLE_TYPE_SIZE 64
+-#define LONG_DOUBLE_TYPE_SIZE (TARGET_NEWABI ? 128 : 64)
++#define LONG_DOUBLE_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+ 
+ /* Define the sizes of fixed-point types.  */
+ #define SHORT_FRACT_TYPE_SIZE 8
+@@ -620,8 +223,6 @@ struct loongarch_cpu_info {
+ #define SHORT_ACCUM_TYPE_SIZE 16
+ #define ACCUM_TYPE_SIZE 32
+ #define LONG_ACCUM_TYPE_SIZE 64
+-/* FIXME.  LONG_LONG_ACCUM_TYPE_SIZE should be 128 bits, but GCC
+-   doesn't support 128-bit integers for LARCH32 currently.  */
+ #define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+ 
+ /* long double is not a fixed mode, but the idea is that, if we
+@@ -630,7 +231,7 @@ struct loongarch_cpu_info {
+ 
+ /* Width in bits of a pointer.  */
+ #ifndef POINTER_SIZE
+-#define POINTER_SIZE ((TARGET_64BIT) ? 64 : 32)
++#define POINTER_SIZE (TARGET_64BIT ? 64 : 32)
+ #endif
+ 
+ /* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+@@ -642,8 +243,8 @@ struct loongarch_cpu_info {
+ /* Alignment of field after `int : 0' in a structure.  */
+ #define EMPTY_FIELD_BOUNDARY 32
+ 
+-/* Every structure's size must be a multiple of this.  */
+-/* 8 is observed right on a DECstation and on riscos 4.02.  */
++/* Number of bits which any structure or union's size must be a multiple of.
++   Each structure or union's size is rounded up to a multiple of this.  */
+ #define STRUCTURE_SIZE_BOUNDARY 8
+ 
+ /* There is no point aligning anything to a rounder boundary than
+@@ -655,6 +256,9 @@ struct loongarch_cpu_info {
+ /* All accesses must be aligned.  */
+ #define STRICT_ALIGNMENT (TARGET_STRICT_ALIGN)
+ 
++/* Glibc align malloc to 128 from glibc/sysdeps/generic/malloc-alignment.h.  */
++#define MALLOC_ABI_ALIGNMENT  128
++
+ /* Define this if you wish to imitate the way many other C compilers
+    handle alignment of bitfields and the structures that contain
+    them.
+@@ -699,22 +303,17 @@ struct loongarch_cpu_info {
+ /* We need this for the same reason as DATA_ALIGNMENT, namely to cause
+    character arrays to be word-aligned so that `strcpy' calls that copy
+    constants to character arrays can be done inline, and 'strcmp' can be
+-   optimised to use word loads. */
+-#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+-  DATA_ALIGNMENT (TYPE, ALIGN)
+-  
+-#define PAD_VARARGS_DOWN \
+-  (targetm.calls.function_arg_padding (TYPE_MODE (type), type) == PAD_DOWNWARD)
++   optimised to use word loads.  */
++#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
+ 
+ /* Define if operations between registers always perform the operation
+    on the full register even if a narrower mode is specified.  */
+ #define WORD_REGISTER_OPERATIONS 1
+ 
+-/* When in 64-bit mode, move insns will sign extend SImode and CCmode
++/* When in 64-bit mode, move insns will sign extend SImode and FCCmode
+    moves.  All other references are zero extended.  */
+ #define LOAD_EXTEND_OP(MODE) \
+-  (TARGET_64BIT && ((MODE) == SImode || (MODE) == CCmode) \
+-   ? SIGN_EXTEND : ZERO_EXTEND)
++  ((TARGET_64BIT && (MODE) == SImode) ? SIGN_EXTEND : UNKNOWN)
+ 
+ /* Define this macro if it is advisable to hold scalars in registers
+    in a wider mode than that declared by the program.  In such cases,
+@@ -722,13 +321,13 @@ struct loongarch_cpu_info {
+    type, but kept valid in the wider mode.  The signedness of the
+    extension may differ from that of the type.  */
+ 
+-#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+-  if (GET_MODE_CLASS (MODE) == MODE_INT		\
++#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
++  if (GET_MODE_CLASS (MODE) == MODE_INT \
+       && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+-    {                                           \
+-      if ((MODE) == SImode)                     \
+-        (UNSIGNEDP) = 0;                        \
+-      (MODE) = Pmode;                           \
++    { \
++      if ((MODE) == SImode) \
++	(UNSIGNEDP) = 0; \
++      (MODE) = Pmode; \
+     }
+ 
+ /* Pmode is always the same as ptr_mode, but not always the same as word_mode.
+@@ -738,11 +337,11 @@ struct loongarch_cpu_info {
+ /* Define if loading short immediate values into registers sign extends.  */
+ #define SHORT_IMMEDIATES_SIGN_EXTEND 1
+ 
+-/* The [d]clz instructions have the natural values at 0.  */
++/* The clz.{w/d} instructions have the natural values at 0.  */
+ 
+ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+-
++
+ /* Standard register usage.  */
+ 
+ /* Number of hardware registers.  We have:
+@@ -757,57 +356,39 @@ struct loongarch_cpu_info {
+ 
+ #define FIRST_PSEUDO_REGISTER 74
+ 
+-/* By default, fix the kernel registers ($26 and $27), the global
+-   pointer ($28) and the stack pointer ($29).  This can change
+-   depending on the command-line options.
+-
+-   Regarding coprocessor registers: without evidence to the contrary,
+-   it's best to assume that each coprocessor register has a unique
+-   use.  This can be overridden, in, e.g., loongarch_option_override or
+-   TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be
+-   inappropriate for a particular target.  */
+-
++/* zero, tp, sp and x are fixed.  */
+ #define FIXED_REGISTERS							\
+-{									\
++{ /* General-purpose registers.  */					\
+   1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+   0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
++  /* Floating-point registers.  */					\
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
++  /* Others.  */							\
+   0, 0, 0, 0, 0, 0, 0, 1, 1, 1}
+ 
+-
+-/* Set up this array for o32 by default.
+-
+-   Note that we don't mark $31 as a call-clobbered register.  The idea is
+-   that it's really the call instructions themselves which clobber $31.
+-   We don't care what the called function does with it afterwards.
+-
+-   This approach makes it easier to implement sibcalls.  Unlike normal
+-   calls, sibcalls don't clobber $31, so the register reaches the
+-   called function in tact.  EPILOGUE_USES says that $31 is useful
+-   to the called function.  */
+-
++/* The call RTLs themselves clobber ra.  */
+ #define CALL_USED_REGISTERS						\
+-{									\
++{ /* General registers.  */						\
+   1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+   1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
++  /* Floating-point registers.  */					\
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+   1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,			\
++  /* Others.  */							\
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
+ 
+ /* Internal macros to classify a register number as to whether it's a
+-   general purpose register, a floating point register, a
+-   multiply/divide register, or a status register.  */
++   general purpose register, a floating point register, or a status
++   register.  */
+ 
+ #define GP_REG_FIRST 0
+-#define GP_REG_LAST  31
+-#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+-#define GP_DBX_FIRST 0
++#define GP_REG_LAST 31
++#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1)
+ 
+ #define FP_REG_FIRST 32
+-#define FP_REG_LAST  63
+-#define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
+-#define FP_DBX_FIRST ((write_symbols == DBX_DEBUG) ? 38 : 32)
++#define FP_REG_LAST 63
++#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1)
+ 
+ #define LSX_REG_FIRST FP_REG_FIRST
+ #define LSX_REG_LAST  FP_REG_LAST
+@@ -823,20 +404,16 @@ struct loongarch_cpu_info {
+    would need to be handled by the DWARF unwinder.  */
+ #define DWARF_ALT_FRAME_RETURN_COLUMN 72
+ 
+-#define ST_REG_FIRST 64
+-#define ST_REG_LAST  71
+-#define ST_REG_NUM   (ST_REG_LAST - ST_REG_FIRST + 1)
++#define FCC_REG_FIRST 64
++#define FCC_REG_LAST 71
++#define FCC_REG_NUM (FCC_REG_LAST - FCC_REG_FIRST + 1)
+ 
+-#define GP_REG_P(REGNO)	\
++#define GP_REG_P(REGNO) \
+   ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+-#define M16_REG_P(REGNO) \
+-  (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 16 || (REGNO) == 17)
+-#define M16STORE_REG_P(REGNO) \
+-  (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 0 || (REGNO) == 17)
+-#define FP_REG_P(REGNO)  \
++#define FP_REG_P(REGNO) \
+   ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+-#define ST_REG_P(REGNO) \
+-  ((unsigned int) ((int) (REGNO) - ST_REG_FIRST) < ST_REG_NUM)
++#define FCC_REG_P(REGNO) \
++  ((unsigned int) ((int) (REGNO) - FCC_REG_FIRST) < FCC_REG_NUM)
+ #define LSX_REG_P(REGNO) \
+   ((unsigned int) ((int) (REGNO) - LSX_REG_FIRST) < LSX_REG_NUM)
+ #define LASX_REG_P(REGNO) \
+@@ -846,10 +423,6 @@ struct loongarch_cpu_info {
+ #define LSX_REG_RTX_P(X) (REG_P (X) && LSX_REG_P (REGNO (X)))
+ #define LASX_REG_RTX_P(X) (REG_P (X) && LASX_REG_P (REGNO (X)))
+ 
+-
+-#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG)				\
+-  loongarch_hard_regno_rename_ok (OLD_REG, NEW_REG)
+-
+ /* Select a register mode required for caller save of hard regno REGNO.  */
+ #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+   loongarch_hard_regno_caller_save_mode (REGNO, NREGS, MODE)
+@@ -862,35 +435,34 @@ struct loongarch_cpu_info {
+ #define ARG_POINTER_REGNUM 72
+ #define FRAME_POINTER_REGNUM 73
+ 
+-#define HARD_FRAME_POINTER_REGNUM \
+-  (GP_REG_FIRST + 22)
+-
+-/* FIXME: */
+-/* #define HARD_FRAME_POINTER_IS_FRAME_POINTER (HARD_FRAME_POINTER_REGNUM == FRAME_POINTER_REGNUM) */
+-/* #define HARD_FRAME_POINTER_IS_ARG_POINTER (HARD_FRAME_POINTER_REGNUM == ARG_POINTER_REGNUM) */
++#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 22)
+ 
+ #define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
+ #define HARD_FRAME_POINTER_IS_ARG_POINTER 0
+ 
+-/* FIXME: */
+ /* Register in which static-chain is passed to a function.  */
+-#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8 */
+-
+-#define LARCH_PROLOGUE_TEMP_REGNUM \
+-  (GP_REG_FIRST + 13)
+-#define LARCH_PROLOGUE_TEMP2_REGNUM \
+-  (GP_REG_FIRST + 12)
+-#define LARCH_PROLOGUE_TEMP3_REGNUM \
+-  (GP_REG_FIRST + 14)
+-#define LARCH_EPILOGUE_TEMP_REGNUM		\
+-  (GP_REG_FIRST + (12))
+-
+-#define LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM)
++#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8  */
++
++/* DRAP register if static-chain register is unavailable.  */
++#define DRAP_REGNUM (GP_REG_FIRST + 15) /* $t3  */
++
++#define GP_TEMP_FIRST (GP_REG_FIRST + 12)
++#define LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1)
++#define LARCH_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST)
++#define LARCH_PROLOGUE_TEMP3_REGNUM (GP_TEMP_FIRST + 2)
++#define LARCH_EPILOGUE_TEMP_REGNUM (GP_TEMP_FIRST)
++
++#define CALLEE_SAVED_REG_NUMBER(REGNO) \
++  ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) - 22 : -1)
++
++#define LARCH_PROLOGUE_TEMP(MODE) \
++  gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM)
+ #define LARCH_PROLOGUE_TEMP2(MODE) \
+   gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP2_REGNUM)
+ #define LARCH_PROLOGUE_TEMP3(MODE) \
+   gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP3_REGNUM)
+-#define LARCH_EPILOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM)
++#define LARCH_EPILOGUE_TEMP(MODE) \
++  gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM)
+ 
+ /* Define this macro if it is as good or better to call a constant
+    function address than to call an address kept in a register.  */
+@@ -898,7 +470,6 @@ struct loongarch_cpu_info {
+ 
+ #define THREAD_POINTER_REGNUM (GP_REG_FIRST + 2)
+ 
+-
+ /* Define the classes of registers for register constraints in the
+    machine description.  Also define ranges of constants.
+ 
+@@ -908,7 +479,7 @@ struct loongarch_cpu_info {
+ 
+    The name GENERAL_REGS must be the name of a class (or an alias for
+    another name such as ALL_REGS).  This is the class of registers
+-   that is allowed by "g" or "r" in a register constraint.
++   that is allowed by "r" in a register constraint.
+    Also, registers outside this class are allocated only when
+    instructions express preferences for them.
+ 
+@@ -921,16 +492,16 @@ struct loongarch_cpu_info {
+ 
+ enum reg_class
+ {
+-  NO_REGS,                      /* no registers in set */
+-  SIBCALL_REGS,		        /* SIBCALL_REGS */
+-  JALR_REGS,			/* JALR_REGS */
+-  GR_REGS,			/* integer registers */
+-  CSR_REGS,			/* integer registers except for $r0 and $r1 for csr. */
+-  FP_REGS,			/* floating point registers */
+-  ST_REGS,			/* status registers (fp status) */
+-  FRAME_REGS,			/* arg pointer and frame pointer */
+-  ALL_REGS,			/* all registers */
+-  LIM_REG_CLASSES		/* max value + 1 */
++  NO_REGS,	  /* no registers in set */
++  SIBCALL_REGS,	  /* registers used by indirect sibcalls */
++  JIRL_REGS,	  /* registers used by indirect calls */
++  CSR_REGS,	  /* integer registers except for $r0 and $r1 for lcsr. */
++  GR_REGS,	  /* integer registers */
++  FP_REGS,	  /* floating point registers */
++  FCC_REGS,	  /* status registers (fp status) */
++  FRAME_REGS,	  /* arg pointer and frame pointer */
++  ALL_REGS,	  /* all registers */
++  LIM_REG_CLASSES /* max value + 1 */
+ };
+ 
+ #define N_REG_CLASSES (int) LIM_REG_CLASSES
+@@ -945,11 +516,11 @@ enum reg_class
+ {									\
+   "NO_REGS",								\
+   "SIBCALL_REGS",							\
+-  "JALR_REGS",								\
+-  "GR_REGS",								\
++  "JIRL_REGS",								\
+   "CSR_REGS",								\
++  "GR_REGS",								\
+   "FP_REGS",								\
+-  "ST_REGS",								\
++  "FCC_REGS",								\
+   "FRAME_REGS",								\
+   "ALL_REGS"								\
+ }
+@@ -968,29 +539,28 @@ enum reg_class
+ #define REG_CLASS_CONTENTS						\
+ {									\
+   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+-  { 0x001ff000, 0x00000000, 0x00000000 },	/* SIBCALL_REGS */	\
+-  { 0xff9ffff0, 0x00000000, 0x00000000 },	/* JALR_REGS */		\
+-  { 0xffffffff, 0x00000000, 0x00000000 },	/* GR_REGS */		\
++  { 0x001fd000, 0x00000000, 0x00000000 },	/* SIBCALL_REGS */	\
++  { 0xff9ffff0, 0x00000000, 0x00000000 },	/* JIRL_REGS */		\
+   { 0xfffffffc, 0x00000000, 0x00000000 },	/* CSR_REGS */		\
++  { 0xffffffff, 0x00000000, 0x00000000 },	/* GR_REGS */		\
+   { 0x00000000, 0xffffffff, 0x00000000 },	/* FP_REGS */		\
+-  { 0x00000000, 0x00000000, 0x000000ff },	/* ST_REGS */		\
++  { 0x00000000, 0x00000000, 0x000000ff },	/* FCC_REGS */		\
+   { 0x00000000, 0x00000000, 0x00000300 },	/* FRAME_REGS */	\
+   { 0xffffffff, 0xffffffff, 0x000003ff }	/* ALL_REGS */		\
+ }
+ 
+-
+ /* A C expression whose value is a register class containing hard
+    register REGNO.  In general there is more that one such class;
+    choose a class which is "minimal", meaning that no smaller class
+    also contains the register.  */
+ 
+-#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[ (REGNO) ]
++#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[(REGNO)]
+ 
+ /* A macro whose definition is the name of the class to which a
+    valid base register must belong.  A base register is one used in
+    an address which is the register value plus a displacement.  */
+ 
+-#define BASE_REG_CLASS  (GR_REGS)
++#define BASE_REG_CLASS (GR_REGS)
+ 
+ /* A macro whose definition is the name of the class to which a
+    valid index register must belong.  An index register is one used
+@@ -998,7 +568,7 @@ enum reg_class
+    factor or added to another register (as well as added to a
+    displacement).  */
+ 
+-#define INDEX_REG_CLASS NO_REGS
++#define INDEX_REG_CLASS GR_REGS
+ 
+ /* We generally want to put call-clobbered registers ahead of
+    call-saved ones.  (IRA expects this.)  */
+@@ -1006,10 +576,6 @@ enum reg_class
+ #define REG_ALLOC_ORDER							\
+ { /* Call-clobbered GPRs.  */						\
+   12, 13, 14, 15, 16, 17, 18, 19, 20, 4, 5, 6, 7, 8, 9, 10, 11, 1,	\
+-  /* The global pointer.  This is call-clobbered for o32 and o64	\
+-     abicalls, call-saved for n32 and n64 abicalls, and a program	\
+-     invariant otherwise.  Putting it between the call-clobbered	\
+-     and call-saved registers should cope with all eventualities.  */	\
+   /* Call-saved GPRs.  */						\
+   23, 24, 25, 26, 27, 28, 29, 30, 31,					\
+   /* GPRs that can never be exposed to the register allocator.  */	\
+@@ -1017,31 +583,27 @@ enum reg_class
+   /* Call-clobbered FPRs.  */						\
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+   48, 49, 50, 51,52, 53, 54, 55, 					\
+-  /* FPRs that are usually call-saved.  The odd ones are actually	\
+-     call-clobbered for n32, but listing them ahead of the even		\
+-     registers might encourage the register allocator to fragment	\
+-     the available FPR pairs.  We need paired FPRs to store long	\
+-     doubles, so it isn't clear that using a different order		\
+-     for n32 would be a win.  */					\
+   56, 57, 58, 59, 60, 61, 62, 63,					\
+   /* None of the remaining classes have defined call-saved		\
+      registers.  */							\
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73}
+ 
++#define IMM_BITS 12
++#define IMM_REACH (HOST_WIDE_INT_1 << IMM_BITS)
++#define HWIT_1U HOST_WIDE_INT_1U
++
+ /* True if VALUE is an unsigned 6-bit number.  */
+ 
+-#define UIMM6_OPERAND(VALUE) \
+-  (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0)
++#define UIMM6_OPERAND(VALUE) (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0)
+ 
+ /* True if VALUE is a signed 10-bit number.  */
+ 
+-#define IMM10_OPERAND(VALUE) \
+-  ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400)
++#define IMM10_OPERAND(VALUE) ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400)
+ 
+ /* True if VALUE is a signed 12-bit number.  */
+ 
+ #define IMM12_OPERAND(VALUE) \
+-  ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000)
++  ((unsigned HOST_WIDE_INT) (VALUE) + IMM_REACH / 2 < IMM_REACH)
+ 
+ /* True if VALUE is a signed 13-bit number.  */
+ 
+@@ -1053,67 +615,51 @@ enum reg_class
+ #define IMM16_OPERAND(VALUE) \
+   ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000)
+ 
+-
+-/* True if VALUE is a signed 12-bit number.  */
+-
+-#define SMALL_OPERAND(VALUE) \
+-  ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000)
+-
+ /* True if VALUE is an unsigned 12-bit number.  */
+ 
+-#define SMALL_OPERAND_UNSIGNED(VALUE) \
+-  (((VALUE) & ~(unsigned HOST_WIDE_INT) 0xfff) == 0)
++#define IMM12_OPERAND_UNSIGNED(VALUE) \
++  (((VALUE) & ~(unsigned HOST_WIDE_INT) (IMM_REACH - 1)) == 0)
+ 
+-/* True if VALUE can be loaded into a register using LUI.  */
++/* True if VALUE can be loaded into a register using LU12I.  */
+ 
+-#define LUI_OPERAND(VALUE)					\
+-  (((VALUE) | 0x7ffff000) == 0x7ffff000				\
+-   || ((VALUE) | 0x7ffff000) + 0x1000 == 0)
++#define LU12I_OPERAND(VALUE) \
++  (((VALUE) | ((HWIT_1U << 31) - IMM_REACH)) == ((HWIT_1U << 31) - IMM_REACH) \
++   || ((VALUE) | ((HWIT_1U << 31) - IMM_REACH)) + IMM_REACH == 0)
+ 
+-/* True if VALUE can be loaded into a register using LUI.  */
++/* True if VALUE can be loaded into a register using LU32I.  */
+ 
+-#define LU32I_OPERAND(VALUE)					\
+-  ((((VALUE) | 0x7ffff00000000) == 0x7ffff00000000)				\
+-   || ((VALUE) | 0x7ffff00000000) + 0x100000000 == 0)
++#define LU32I_OPERAND(VALUE) \
++  (((VALUE) | (((HWIT_1U << 19) - 1) << 32)) == (((HWIT_1U << 19) - 1) << 32) \
++   || ((VALUE) | (((HWIT_1U << 19) - 1) << 32)) + (HWIT_1U << 32) == 0)
+ 
+-/* True if VALUE can be loaded into a register using LUI.  */
++/* True if VALUE can be loaded into a register using LU52I.  */
+ 
+-#define LU52I_OPERAND(VALUE)					\
+-  ((((VALUE) | 0xfff0000000000000) == 0xfff0000000000000))
++#define HWIT_UC_0xFFF HOST_WIDE_INT_UC(0xfff)
++#define LU52I_OPERAND(VALUE) \
++  (((VALUE) | (HWIT_UC_0xFFF << 52)) == (HWIT_UC_0xFFF << 52))
+ 
+ /* Return a value X with the low 12 bits clear, and such that
+    VALUE - X is a signed 12-bit value.  */
+ 
+-#define CONST_HIGH_PART(VALUE) \
+-  (((VALUE) + 0x800) & ~(unsigned HOST_WIDE_INT) 0xfff)
++#define CONST_HIGH_PART(VALUE) (((VALUE) + (IMM_REACH / 2)) & ~(IMM_REACH - 1))
+ 
+-#define CONST_LOW_PART(VALUE) \
+-  ((VALUE) - CONST_HIGH_PART (VALUE))
++#define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE))
+ 
+-#define SMALL_INT(X) SMALL_OPERAND (INTVAL (X))
+-#define SMALL_INT_UNSIGNED(X) SMALL_OPERAND_UNSIGNED (INTVAL (X))
+-#define LUI_INT(X) LUI_OPERAND (INTVAL (X))
++#define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
++#define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
++#define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
+ #define LU32I_INT(X) LU32I_OPERAND (INTVAL (X))
+ #define LU52I_INT(X) LU52I_OPERAND (INTVAL (X))
+-#define ULARCH_12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047))
++#define LARCH_U12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047))
+ #define LARCH_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -256, 255))
+-#define LISA_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767))
+-#define LISA_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0)
+-
+-/* The HI and LO registers can only be reloaded via the general
+-   registers.  Condition code registers can only be loaded to the
+-   general registers, and from the floating point registers.  */
+-
+-#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+-  loongarch_secondary_reload_class (CLASS, MODE, X, true)
+-#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+-  loongarch_secondary_reload_class (CLASS, MODE, X, false)
++#define LARCH_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767))
++#define LARCH_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0)
+ 
+ /* Return the maximum number of consecutive registers
+    needed to represent mode MODE in a register of class CLASS.  */
+ 
+ #define CLASS_MAX_NREGS(CLASS, MODE) loongarch_class_max_nregs (CLASS, MODE)
+-
++
+ /* Stack layout; function entry, exit and calling.  */
+ 
+ #define STACK_GROWS_DOWNWARD 1
+@@ -1127,11 +673,13 @@ enum reg_class
+ 
+ #define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+ 
+-#define ELIMINABLE_REGS							\
+-{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+- { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM},				\
+- { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+- { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},}
++#define ELIMINABLE_REGS \
++  { \
++    {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
++    {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
++    {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
++    {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
++  }
+ 
+ #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+   (OFFSET) = loongarch_initial_elimination_offset ((FROM), (TO))
+@@ -1142,11 +690,7 @@ enum reg_class
+ /* The argument pointer always points to the first argument.  */
+ #define FIRST_PARM_OFFSET(FNDECL) 0
+ 
+-/* o32 and o64 reserve stack space for all argument registers.  */
+-#define REG_PARM_STACK_SPACE(FNDECL) 			\
+-  (TARGET_OLDABI					\
+-   ? (MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)		\
+-   : 0)
++#define REG_PARM_STACK_SPACE(FNDECL) 0
+ 
+ /* Define this if it is the responsibility of the caller to
+    allocate the area reserved for arguments passed in registers.
+@@ -1155,22 +699,25 @@ enum reg_class
+    `crtl->outgoing_args_size'.  */
+ #define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+ 
+-#define STACK_BOUNDARY (TARGET_NEWABI ? 128 : 64)
+-
++#define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64)
++
++/* Maximum stack alignment.  */
++#define MAX_STACK_ALIGNMENT (loongarch_stack_realign ? MAX_OFILE_ALIGNMENT : STACK_BOUNDARY)
++
+ /* Symbolic macros for the registers used to return integer and floating
+    point values.  */
+ 
+ #define GP_RETURN (GP_REG_FIRST + 4)
+ #define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0))
+ 
+-#define MAX_ARGS_IN_REGISTERS (TARGET_OLDABI ? 4 : 8)
++#define MAX_ARGS_IN_REGISTERS 8
+ 
+ /* Symbolic macros for the first/last argument registers.  */
+ 
+ #define GP_ARG_FIRST (GP_REG_FIRST + 4)
+-#define GP_ARG_LAST  (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
++#define GP_ARG_LAST (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+ #define FP_ARG_FIRST (FP_REG_FIRST + 0)
+-#define FP_ARG_LAST  (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
++#define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+ 
+ /* True if MODE is vector and supported in a LSX vector register.  */
+ #define LSX_SUPPORTED_MODE_P(MODE)			\
+@@ -1188,60 +735,39 @@ enum reg_class
+    && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT		\
+        || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT))
+ 
++#define RECIP_MASK_NONE         0x00
++#define RECIP_MASK_DIV          0x01
++#define RECIP_MASK_SQRT         0x02
++#define RECIP_MASK_RSQRT        0x04
++#define RECIP_MASK_VEC_DIV      0x08
++#define RECIP_MASK_VEC_SQRT     0x10
++#define RECIP_MASK_VEC_RSQRT    0x20
++#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \
++                        | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \
++			| RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT)
++
++#define TARGET_RECIP_DIV	((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664)
++#define TARGET_RECIP_SQRT	((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664)
++#define TARGET_RECIP_RSQRT	((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664)
++#define TARGET_RECIP_VEC_DIV	((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664)
++#define TARGET_RECIP_VEC_SQRT	((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664)
++#define TARGET_RECIP_VEC_RSQRT	((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664)
++
+ /* 1 if N is a possible register number for function argument passing.
+    We have no FP argument registers when soft-float.  */
+ 
+ /* Accept arguments in a0-a7, and in fa0-fa7 if permitted by the ABI.  */
+-#define FUNCTION_ARG_REGNO_P(N)						\
+-  (IN_RANGE ((N), GP_ARG_FIRST, GP_ARG_LAST)				\
++#define FUNCTION_ARG_REGNO_P(N) \
++  (IN_RANGE ((N), GP_ARG_FIRST, GP_ARG_LAST) \
+    || (UNITS_PER_FP_ARG && IN_RANGE ((N), FP_ARG_FIRST, FP_ARG_LAST)))
+ 
+-
+-/* This structure has to cope with two different argument allocation
+-   schemes.  Most LARCH ABIs view the arguments as a structure, of which
+-   the first N words go in registers and the rest go on the stack.  If I
+-   < N, the Ith word might go in Ith integer argument register or in a
+-   floating-point register.  For these ABIs, we only need to remember
+-   the offset of the current argument into the structure.
+-
+-   So for the standard ABIs, the first N words are allocated to integer
+-   registers, and loongarch_function_arg decides on an argument-by-argument
+-   basis whether that argument should really go in an integer register,
+-   or in a floating-point one.  */
+-
+-typedef struct loongarch_args {
+-  /* Always true for varargs functions.  Otherwise true if at least
+-     one argument has been passed in an integer register.  */
+-  int gp_reg_found;
+-
+-  /* The number of arguments seen so far.  */
+-  unsigned int arg_number;
+-
+-  /* The number of integer registers used so far. This is the number
+-     of words that have been added to the argument structure, limited
+-     to MAX_ARGS_IN_REGISTERS.  */
++typedef struct {
++  /* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS.  */
+   unsigned int num_gprs;
+ 
++  /* Number of floating-point registers used so far, likewise.  */
+   unsigned int num_fprs;
+ 
+-  /* The number of words passed on the stack.  */
+-  unsigned int stack_words;
+-
+-  /* On the loongarch16, we need to keep track of which floating point
+-     arguments were passed in general registers, but would have been
+-     passed in the FP regs if this were a 32-bit function, so that we
+-     can move them to the FP regs if we wind up calling a 32-bit
+-     function.  We record this information in fp_code, encoded in base
+-     four.  A zero digit means no floating point argument, a one digit
+-     means an SFmode argument, and a two digit means a DFmode argument,
+-     and a three digit is not used.  The low order digit is the first
+-     argument.  Thus 6 == 1 * 4 + 2 means a DFmode argument followed by
+-     an SFmode argument.  ??? A more sophisticated approach will be
+-     needed if LARCH_ABI != ABILP32.  */
+-  int fp_code;
+-
+-  /* True if the function has a prototype.  */
+-  int prototype;
+ } CUMULATIVE_ARGS;
+ 
+ /* Initialize a variable CUM of type CUMULATIVE_ARGS
+@@ -1251,48 +777,37 @@ typedef struct loongarch_args {
+ #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+   memset (&(CUM), 0, sizeof (CUM))
+ 
+-
+-#define EPILOGUE_USES(REGNO)	loongarch_epilogue_uses (REGNO)
++#define EPILOGUE_USES(REGNO) loongarch_epilogue_uses (REGNO)
+ 
++#define STACK_ALIGN_SIZE_INTERNAL \
++  (crtl->stack_realign_needed) \
++? (crtl->stack_alignment_needed / BITS_PER_UNIT) \
++: (TARGET_ABI_LP64 ? 16 : 8)
+ /* Treat LOC as a byte offset from the stack pointer and round it up
+    to the next fully-aligned offset.  */
+ #define LARCH_STACK_ALIGN(LOC) \
+-  (TARGET_NEWABI ? ROUND_UP ((LOC), 16) : ROUND_UP ((LOC), 8))
++  ROUND_UP ((LOC), TARGET_ABI_LP64 ? 16 : 8)
+ 
+-
+-/* Output assembler code to FILE to increment profiler label # LABELNO
+-   for profiling a function entry.  */
++#define LARCH_STACK_ALIGN2(LOC) \
++  ROUND_UP ((LOC), STACK_ALIGN_SIZE_INTERNAL)
+ 
+ #define MCOUNT_NAME "_mcount"
+ 
+ /* Emit rtl for profiling.  Output assembler code to FILE
+    to call "_mcount" for profiling a function entry.  */
+-#define PROFILE_HOOK(LABEL)						\
+-  {									\
+-    rtx fun, ra;							\
+-    ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);		\
+-    fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
+-    emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode);		\
++#define PROFILE_HOOK(LABEL) \
++  { \
++    rtx fun, ra; \
++    ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); \
++    fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
++    emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode); \
+   }
+ 
+ /* All the work done in PROFILE_HOOK, but still required.  */
+ #define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+ 
+-
+-/* The profiler preserves all interesting registers, including $31.  */
+-#define LARCH_SAVE_REG_FOR_PROFILING_P(REGNO) false
+-
+-/* No loongarch port has ever used the profiler counter word, so don't emit it
+-   or the label for it.  */
+-
+ #define NO_PROFILE_COUNTERS 1
+ 
+-/* Define this macro if the code for function profiling should come
+-   before the function prologue.  Normally, the profiling code comes
+-   after.  */
+-
+-/* #define PROFILE_BEFORE_PROLOGUE */
+-
+ /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+    the stack pointer does not matter.  The value is tested only in
+    functions that have frame pointers.
+@@ -1300,16 +815,13 @@ typedef struct loongarch_args {
+ 
+ #define EXIT_IGNORE_STACK 1
+ 
+-
+ /* Trampolines are a block of code followed by two pointers.  */
+ 
++#define TRAMPOLINE_CODE_SIZE 16
+ #define TRAMPOLINE_SIZE \
+-  (loongarch_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2)
+-
+-/* Forcing a 64-bit alignment for 32-bit targets allows us to load two
+-   pointers from a single LUI base.  */
+-
+-#define TRAMPOLINE_ALIGNMENT 64
++  ((Pmode == SImode) ? TRAMPOLINE_CODE_SIZE \
++		     : (TRAMPOLINE_CODE_SIZE + POINTER_SIZE * 2))
++#define TRAMPOLINE_ALIGNMENT POINTER_SIZE
+ 
+ /* loongarch_trampoline_init calls this library function to flush
+    program and data caches.  */
+@@ -1318,96 +830,64 @@ typedef struct loongarch_args {
+ #define CACHE_FLUSH_FUNC "_flush_cache"
+ #endif
+ 
+-#define LARCH_ICACHE_SYNC(ADDR, SIZE)					\
+-  /* Flush both caches.  We need to flush the data cache in case	\
+-     the system has a write-back cache.  */				\
+-  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, loongarch_cache_flush_func),	\
+-		     LCT_NORMAL, VOIDmode, ADDR, Pmode, SIZE, Pmode,	\
+-		     GEN_INT (3), TYPE_MODE (integer_type_node))
+-
+-
+ /* Addressing modes, and classification of registers for them.  */
+ 
+-#define REGNO_OK_FOR_INDEX_P(REGNO) 0
++#define REGNO_OK_FOR_INDEX_P(REGNO) \
++  loongarch_regno_mode_ok_for_base_p (REGNO, VOIDmode, 1)
++
+ #define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
+   loongarch_regno_mode_ok_for_base_p (REGNO, MODE, 1)
+-
++
+ /* Maximum number of registers that can appear in a valid memory address.  */
+ 
+-#define MAX_REGS_PER_ADDRESS 1
++#define MAX_REGS_PER_ADDRESS 2
+ 
+ /* Check for constness inline but use loongarch_legitimate_address_p
+    to check whether a constant really is an address.  */
+ 
+-#define CONSTANT_ADDRESS_P(X) \
+-  (CONSTANT_P (X) && memory_address_p (SImode, X))
++#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && memory_address_p (SImode, X))
+ 
+ /* This handles the magic '..CURRENT_FUNCTION' symbol, which means
+    'the start of the function that this code is output in'.  */
+ 
+-#define ASM_OUTPUT_LABELREF(FILE,NAME)					\
+-  do {									\
+-    if (strcmp (NAME, "..CURRENT_FUNCTION") == 0)			\
+-      asm_fprintf ((FILE), "%U%s",					\
+-		   XSTR (XEXP (DECL_RTL (current_function_decl),	\
+-			       0), 0));					\
+-    else								\
+-      asm_fprintf ((FILE), "%U%s", (NAME));				\
+-  } while (0)
+-
+-/* Flag to mark a function decl symbol that requires a long call.  */
+-#define SYMBOL_FLAG_LONG_CALL	(SYMBOL_FLAG_MACH_DEP << 0)
+-#define SYMBOL_REF_LONG_CALL_P(X)					\
+-  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0)
+-
+-/* This flag marks functions that cannot be lazily bound.  */
+-#define SYMBOL_FLAG_BIND_NOW (SYMBOL_FLAG_MACH_DEP << 1)
+-#define SYMBOL_REF_BIND_NOW_P(RTX) \
+-  ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_BIND_NOW) != 0)
+-
+-/* True if we're generating a form of LARCH16 code in which jump tables
+-   are stored in the text section and encoded as 16-bit PC-relative
+-   offsets.  This is only possible when general text loads are allowed,
+-   since the table access itself will be an "lh" instruction.  If the
+-   PC-relative offsets grow too large, 32-bit offsets are used instead.  */
+-
+-
+-#define CASE_VECTOR_MODE (ptr_mode)
++#define ASM_OUTPUT_LABELREF(FILE, NAME) \
++  do \
++    { \
++      if (strcmp (NAME, "..CURRENT_FUNCTION") == 0) \
++	asm_fprintf ((FILE), "%U%s", \
++		     XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); \
++      else \
++	asm_fprintf ((FILE), "%U%s", (NAME)); \
++    } \
++  while (0)
+ 
+-/* Only use short offsets if their range will not overflow.  */
+-#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) \
+-  (ptr_mode ? HImode : SImode)
++#define CASE_VECTOR_MODE Pmode
+ 
++#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode
+ 
+ /* Define this as 1 if `char' should by default be signed; else as 0.  */
+ #ifndef DEFAULT_SIGNED_CHAR
+ #define DEFAULT_SIGNED_CHAR 1
+ #endif
+ 
+-/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets,
+-   we generally don't want to use them for copying arbitrary data.
+-   A single N-word move is usually the same cost as N single-word moves.  */
++/* The SPARC port says:
++   The maximum number of bytes that a single instruction
++   can move quickly between memory and registers or between
++   two memory locations.  */
+ #define MOVE_MAX UNITS_PER_WORD
+ /* We don't modify it for LSX as it is only used by the classic reload.  */
+ #define MAX_MOVE_MAX 8
+ 
+-/* Define this macro as a C expression which is nonzero if
+-   accessing less than a word of memory (i.e. a `char' or a
+-   `short') is no faster than accessing a word of memory, i.e., if
+-   such access require more than one instruction or if there is no
+-   difference in cost between byte and (aligned) word loads.
+-
+-   On RISC machines, it tends to generate better code to define
+-   this as 1, since it avoids making a QI or HI mode register.
+-
+-*/
+-#define SLOW_BYTE_ACCESS (1)
+-
+-/* Standard LARCH integer shifts truncate the shift amount to the
+-   width of the shifted operand.  However, Loongson MMI shifts
+-   do not truncate the shift amount at all.  */
+-#define SHIFT_COUNT_TRUNCATED (1)
++/* The SPARC port says:
++   Nonzero if access to memory by bytes is slow and undesirable.
++   For RISC chips, it means that access to memory by bytes is no
++   better than access by words when possible, so grab a whole word
++   and maybe make use of that.  */
++#define SLOW_BYTE_ACCESS 1
+ 
++/* Standard LoongArch integer shifts truncate the shift amount to the
++   width of the shifted operand.  */
++#define SHIFT_COUNT_TRUNCATED 1
+ 
+ /* Specify the machine mode that pointers have.
+    After generation of rtl, the compiler makes no further distinction
+@@ -1422,7 +902,6 @@ typedef struct loongarch_args {
+ 
+ #define FUNCTION_MODE SImode
+ 
+-
+ /* We allocate $fcc registers by hand and can't cope with moves of
+    CCmode registers to and from pseudos (or memory).  */
+ #define AVOID_CCMODE_COPIES
+@@ -1433,14 +912,6 @@ typedef struct loongarch_args {
+ #define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost
+ #define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+ 
+-/* The LARCH port has several functions that return an instruction count.
+-   Multiplying the count by this value gives the number of bytes that
+-   the instructions occupy.  */
+-#define BASE_INSN_LENGTH (4)
+-
+-/* The length of a NOP in bytes.  */
+-#define NOP_INSN_LENGTH (4)
+-
+ /* If defined, modifies the length assigned to instruction INSN as a
+    function of the context in which it is used.  LENGTH is an lvalue
+    that contains the initially computed length of the insn and should
+@@ -1451,17 +922,8 @@ typedef struct loongarch_args {
+ /* Return the asm template for a conditional branch instruction.
+    OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
+    its operands.  */
+-#define LARCH_BRANCH(OPCODE, OPERANDS) \
+-  OPCODE "\t" OPERANDS
++#define LARCH_BRANCH(OPCODE, OPERANDS) OPCODE "\t" OPERANDS
+ 
+-#define LARCH_BRANCH_C(OPCODE, OPERANDS) \
+-  OPCODE "%:\t" OPERANDS
+-
+-/* Return an asm string that forces INSN to be treated as an absolute
+-   J or JAL instruction instead of an assembler macro.  */
+-#define LARCH_ABSOLUTE_JUMP(INSN) INSN
+-
+-
+ /* Control the assembler format that we output.  */
+ 
+ /* Output to assembler file text saying following lines
+@@ -1478,20 +940,19 @@ typedef struct loongarch_args {
+ #define ASM_APP_OFF " #NO_APP\n"
+ #endif
+ 
+-#define REGISTER_NAMES							   \
+-{ "$r0",   "$r1",   "$r2",   "$r3",   "$r4",   "$r5",   "$r6",   "$r7",                   \
+-  "$r8",   "$r9",   "$r10",  "$r11",  "$r12",  "$r13",  "$r14",  "$r15",          \
+-  "$r16",  "$r17",  "$r18",  "$r19",  "$r20",  "$r21",  "$r22",  "$r23",          \
+-  "$r24",  "$r25",  "$r26",  "$r27",  "$r28",  "$r29",  "$r30",  "$r31",          \
+-  "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",          \
+-  "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",         \
+-  "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",         \
+-  "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",         \
+-  "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7",	    \
++#define REGISTER_NAMES							  \
++{ "$r0",   "$r1",   "$r2",   "$r3",   "$r4",   "$r5",   "$r6",   "$r7",   \
++  "$r8",   "$r9",   "$r10",  "$r11",  "$r12",  "$r13",  "$r14",  "$r15",  \
++  "$r16",  "$r17",  "$r18",  "$r19",  "$r20",  "$r21",  "$r22",  "$r23",  \
++  "$r24",  "$r25",  "$r26",  "$r27",  "$r28",  "$r29",  "$r30",  "$r31",  \
++  "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",	  \
++  "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",	  \
++  "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",	  \
++  "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",	  \
++  "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7",	  \
+   "$arg", "$frame"}
+ 
+-/* List the "software" names for each register.  Also list the numerical
+-   names for $fp and $sp.  */
++/* This macro defines additional names for hard registers.  */
+ 
+ #define ADDITIONAL_REGISTER_NAMES					\
+ {									\
+@@ -1595,61 +1056,17 @@ typedef struct loongarch_args {
+   { "xr31",	31 + FP_REG_FIRST }					\
+ }
+ 
+-#define DBR_OUTPUT_SEQEND(STREAM)					\
+-do									\
+-  {									\
+-    /* Emit a blank line after the delay slot for emphasis.  */		\
+-    fputs ("\n", STREAM);						\
+-  }									\
+-while (0)
+-
+-/* The LARCH implementation uses some labels for its own purpose.  The
+-   following lists what labels are created, and are all formed by the
+-   pattern $L[a-z].*.  The machine independent portion of GCC creates
+-   labels matching:  $L[A-Z][0-9]+ and $L[0-9]+.
+-
+-	LM[0-9]+	Silicon Graphics/ECOFF stabs label before each stmt.
+-	$Lb[0-9]+	Begin blocks for LARCH debug support
+-	$Lc[0-9]+	Label for use in s<xx> operation.
+-	$Le[0-9]+	End blocks for LARCH debug support  */
+-
+-#undef ASM_DECLARE_OBJECT_NAME
+-#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \
+-  loongarch_declare_object (STREAM, NAME, "", ":\n")
+-
+ /* Globalizing directive for a label.  */
+ #define GLOBAL_ASM_OP "\t.globl\t"
+ 
+-/* This says how to define a global common symbol.  */
+-
+-#define ASM_OUTPUT_ALIGNED_DECL_COMMON loongarch_output_aligned_decl_common
+-
+-/* This says how to define a local common symbol (i.e., not visible to
+-   linker).  */
+-
+-#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+-#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \
+-  loongarch_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, false)
+-#endif
+-
+ /* This says how to output an external.  It would be possible not to
+-   output anything and let undefined symbol become external. However
++   output anything and let undefined symbol become external.  However
+    the assembler uses length information on externals to allocate in
+    data/sdata bss/sbss, thereby saving exec time.  */
+ 
+ #undef ASM_OUTPUT_EXTERNAL
+-#define ASM_OUTPUT_EXTERNAL(STREAM,DECL,NAME) \
+-  loongarch_output_external(STREAM,DECL,NAME)
+-
+-/* This is how to declare a function name.  The actual work of
+-   emitting the label is moved to function_prologue, so that we can
+-   get the line number correctly emitted before the .ent directive,
+-   and after any .file directives.  Define as empty so that the function
+-   is not declared before the .ent directive elsewhere.  */
+-
+-#undef ASM_DECLARE_FUNCTION_NAME
+-#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \
+-  loongarch_declare_function_name(STREAM,NAME,DECL)
++#define ASM_OUTPUT_EXTERNAL(STREAM, DECL, NAME) \
++  loongarch_output_external (STREAM, DECL, NAME)
+ 
+ /* This is how to store into the string LABEL
+    the symbol_ref name of an internal numbered label where
+@@ -1657,8 +1074,8 @@ while (0)
+    This is suitable for output with `assemble_name'.  */
+ 
+ #undef ASM_GENERATE_INTERNAL_LABEL
+-#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+-  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM))
++#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
++  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM))
+ 
+ /* Print debug labels as "foo = ." rather than "foo:" because they should
+    represent a byte pointer rather than an ISA-encoded address.  This is
+@@ -1677,159 +1094,108 @@ while (0)
+    At the time of writing, this hook is not used for the function end
+    label:
+ 
+-   	$LFExxx:
++	$LFExxx:
+ 		.end foo
+ 
+    */
+ 
+-#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM)			\
++#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \
+   fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM)
+ 
+ /* This is how to output an element of a case-vector that is absolute.  */
+ 
+-#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+-  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+-	   ptr_mode == DImode ? ".dword" : ".word",			\
+-	   LOCAL_LABEL_PREFIX,						\
+-	   VALUE)
+-
+-/* This is how to output an element of a case-vector.  We can make the
+-   entries GP-relative when .gp(d)word is supported.  */
+-
+-#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+-do {									\
+-  if (TARGET_RTP_PIC)						\
+-    {									\
+-      /* Make the entry relative to the start of the function.  */	\
+-      rtx fnsym = XEXP (DECL_RTL (current_function_decl), 0);		\
+-      fprintf (STREAM, "\t%s\t%sL%d-",					\
+-	       Pmode == DImode ? ".dword" : ".word",			\
+-	       LOCAL_LABEL_PREFIX, VALUE);				\
+-      assemble_name (STREAM, XSTR (fnsym, 0));				\
+-      fprintf (STREAM, "\n");						\
+-    }									\
+-  else									\
+-    fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n",				\
+-	     ptr_mode == DImode ? ".dword" : ".word",			\
+-	     LOCAL_LABEL_PREFIX, VALUE,					\
+-	     LOCAL_LABEL_PREFIX, REL);					\
+-} while (0)
+-
+-/* Mark inline jump tables as data for the purpose of disassembly.  For
+-   simplicity embed the jump table's label number in the local symbol
+-   produced so that multiple jump tables within a single function end
+-   up marked with unique symbols.  Retain the alignment setting from
+-   `elfos.h' as we are replacing the definition from there.  */
+-
+-#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+-#define ASM_OUTPUT_BEFORE_CASE_LABEL(STREAM, PREFIX, NUM, TABLE)	\
+-  do									\
+-    {									\
+-      ASM_OUTPUT_ALIGN ((STREAM), 2);					\
+-      if (JUMP_TABLES_IN_TEXT_SECTION)					\
+-	loongarch_set_text_contents_type (STREAM, "__jump_", NUM, FALSE);	\
+-    }									\
+-  while (0)
++#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
++  fprintf (STREAM, "\t%s\t%sL%d\n", ptr_mode == DImode ? ".dword" : ".word", \
++	   LOCAL_LABEL_PREFIX, VALUE)
+ 
+-/* Reset text marking to code after an inline jump table.  Like with
+-   the beginning of a jump table use the label number to keep symbols
+-   unique.  */
++/* This is how to output an element of a case-vector.  */
+ 
+-#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE)				\
+-  do									\
+-    if (JUMP_TABLES_IN_TEXT_SECTION)					\
+-      loongarch_set_text_contents_type (STREAM, "__jend_", NUM, TRUE);	\
++#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
++  do \
++    { \
++      fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n", \
++	       ptr_mode == DImode ? ".dword" : ".word", LOCAL_LABEL_PREFIX, \
++	       VALUE, LOCAL_LABEL_PREFIX, REL); \
++    } \
+   while (0)
+ 
++#define JUMP_TABLES_IN_TEXT_SECTION 0
++
+ /* This is how to output an assembler line
+    that says to advance the location counter
+    to a multiple of 2**LOG bytes.  */
+ 
+-#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+-  fprintf (STREAM, "\t.align\t%d\n", (LOG))
++#define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
+ 
+-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM,LOG)				\
++/* "nop" instruction 54525952 (andi $r0,$r0,0) is
++   used for padding.  */
++#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
+   fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
+ 
+-
+ /* This is how to output an assembler line to advance the location
+    counter by SIZE bytes.  */
+ 
+ #undef ASM_OUTPUT_SKIP
+-#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+-  fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
++#define ASM_OUTPUT_SKIP(STREAM, SIZE) \
++  fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n", (SIZE))
+ 
+ /* This is how to output a string.  */
+ #undef ASM_OUTPUT_ASCII
+ #define ASM_OUTPUT_ASCII loongarch_output_ascii
+ 
+-
+-/* Default to -G 8 */
+-#ifndef LARCH_DEFAULT_GVALUE
+-#define LARCH_DEFAULT_GVALUE 8
+-#endif
+-
+ /* Define the strings to put out for each section in the object file.  */
+-#define TEXT_SECTION_ASM_OP	"\t.text"	/* instructions */
+-#define DATA_SECTION_ASM_OP	"\t.data"	/* large data */
++#define TEXT_SECTION_ASM_OP "\t.text" /* instructions  */
++#define DATA_SECTION_ASM_OP "\t.data" /* large data  */
+ 
+ #undef READONLY_DATA_SECTION_ASM_OP
+-#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rodata"	/* read-only data */
+-
+-#define ASM_OUTPUT_REG_PUSH(STREAM,REGNO)				\
+-do									\
+-  {									\
+-    fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,0(%s)\n",		\
+-	     TARGET_64BIT ? "daddiu" : "addiu",				\
+-	     reg_names[STACK_POINTER_REGNUM],				\
+-	     reg_names[STACK_POINTER_REGNUM],				\
+-	     TARGET_64BIT ? "sd" : "sw",				\
+-	     reg_names[REGNO],						\
+-	     reg_names[STACK_POINTER_REGNUM]);				\
+-  }									\
+-while (0)
+-
+-#define ASM_OUTPUT_REG_POP(STREAM,REGNO)				\
+-do									\
+-  {									\
+-    loongarch_push_asm_switch (&loongarch_noreorder);				\
+-    fprintf (STREAM, "\t%s\t%s,0(%s)\n\t%s\t%s,%s,8\n",			\
+-	     TARGET_64BIT ? "ld" : "lw",				\
+-	     reg_names[REGNO],						\
+-	     reg_names[STACK_POINTER_REGNUM],				\
+-	     TARGET_64BIT ? "daddu" : "addu",				\
+-	     reg_names[STACK_POINTER_REGNUM],				\
+-	     reg_names[STACK_POINTER_REGNUM]);				\
+-    loongarch_pop_asm_switch (&loongarch_noreorder);				\
+-  }									\
+-while (0)
++#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" /* read-only data */
++
++#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)	\
++  do \
++    { \
++      fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,%s,0\n", \
++	       TARGET_64BIT ? "addi.d" : "addi.w", \
++	       reg_names[STACK_POINTER_REGNUM], \
++	       reg_names[STACK_POINTER_REGNUM], \
++	       TARGET_64BIT ? "st.d" : "st.w", reg_names[REGNO], \
++	       reg_names[STACK_POINTER_REGNUM]); \
++    } \
++  while (0)
++
++#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \
++  do \
++    { \
++      fprintf (STREAM, "\t%s\t%s,%s,0\n\t%s\t%s,%s,8\n", \
++	       TARGET_64BIT ? "ld.d" : "ld.w", reg_names[REGNO], \
++	       reg_names[STACK_POINTER_REGNUM], \
++	       TARGET_64BIT ? "addi.d" : "addi.w", \
++	       reg_names[STACK_POINTER_REGNUM], \
++	       reg_names[STACK_POINTER_REGNUM]); \
++    } \
++  while (0)
+ 
+ /* How to start an assembler comment.
+-   The leading space is important (the loongarch native assembler requires it).  */
++   The leading space is important (the loongarch native assembler requires it).
++ */
+ #ifndef ASM_COMMENT_START
+ #define ASM_COMMENT_START " #"
+ #endif
+-
++
+ #undef SIZE_TYPE
+ #define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int")
+ 
+ #undef PTRDIFF_TYPE
+ #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+ 
+-/* The minimum alignment of any expanded block move.  */
+-#define LARCH_MIN_MOVE_MEM_ALIGN 16
+-
+ /* The maximum number of bytes that can be copied by one iteration of
+    a movmemsi loop; see loongarch_block_move_loop.  */
+-#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER \
+-  (UNITS_PER_WORD * 4)
++#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4)
+ 
+ /* The maximum number of bytes that can be copied by a straight-line
+    implementation of movmemsi; see loongarch_block_move_straight.  We want
+    to make sure that any loop-based implementation will iterate at
+    least twice.  */
+-#define LARCH_MAX_MOVE_BYTES_STRAIGHT \
+-  (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
++#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+ 
+ /* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
+    values were determined experimentally by benchmarking with CSiBE.
+@@ -1847,73 +1213,29 @@ while (0)
+    we'll have to generate a load/store pair for each, halve the
+    value of LARCH_CALL_RATIO to take that into account.  */
+ 
+-#define MOVE_RATIO(speed)				\
+-  (HAVE_movmemsi					\
++#define MOVE_RATIO(speed) \
++  (HAVE_movmemsi \
+    ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \
+    : CLEAR_RATIO (speed) / 2)
+ 
+ /* For CLEAR_RATIO, when optimizing for size, give a better estimate
+    of the length of a memset call, but use the default otherwise.  */
+ 
+-#define CLEAR_RATIO(speed)\
+-  ((speed) ? 15 : LARCH_CALL_RATIO)
++#define CLEAR_RATIO(speed) ((speed) ? 15 : LARCH_CALL_RATIO)
+ 
+ /* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
+    optimizing for size adjust the ratio to account for the overhead of
+    loading the constant and replicating it across the word.  */
+ 
+-#define SET_RATIO(speed) \
+-  ((speed) ? 15 : LARCH_CALL_RATIO - 2)
+-
+-/* Since the bits of the _init and _fini function is spread across
+-   many object files, each potentially with its own GP, we must assume
+-   we need to load our GP.  We don't preserve $gp or $ra, since each
+-   init/fini chunk is supposed to initialize $gp, and crti/crtn
+-   already take care of preserving $ra and, when appropriate, $gp.  */
+-#if (defined _ABI64 && _LARCH_SIM == _ABI64)
+-#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+-   asm (SECTION_OP "\n\
+-	.set push\n\
+-	la $r20, " USER_LABEL_PREFIX #FUNC "\n\
+-	jirl $r1, $r20, 0\n\
+-	.set pop\n\
+-	" TEXT_SECTION_ASM_OP);
+-#endif
+-#ifndef HAVE_AS_TLS
+-#define HAVE_AS_TLS 0
+-#endif
+-
+-#ifndef HAVE_AS_NAN
+-#define HAVE_AS_NAN 0
+-#endif
++#define SET_RATIO(speed) ((speed) ? 15 : LARCH_CALL_RATIO - 2)
+ 
+ #ifndef USED_FOR_TARGET
+-/* Information about ".set noFOO; ...; .set FOO" blocks.  */
+-struct loongarch_asm_switch {
+-  /* The FOO in the description above.  */
+-  const char *name;
+-
+-  /* The current block nesting level, or 0 if we aren't in a block.  */
+-  int nesting_level;
+-};
+-
+ extern const enum reg_class loongarch_regno_to_class[];
+-extern const char *current_function_file; /* filename current function is in */
+-extern int num_source_filenames;	/* current .file # */
+-extern int loongarch_dbx_regno[];
+ extern int loongarch_dwarf_regno[];
+-extern bool loongarch_split_p[];
+-extern bool loongarch_use_pcrel_pool_p[];
+-extern enum processor loongarch_arch;        /* which cpu to codegen for */
+-extern enum processor loongarch_tune;        /* which cpu to schedule for */
+-extern int loongarch_isa;			/* architectural level */
+-extern int loongarch_isa_rev;
+-extern const struct loongarch_cpu_info *loongarch_arch_info;
+-extern const struct loongarch_cpu_info *loongarch_tune_info;
+-extern unsigned int loongarch_base_compression_flags;
+ 
+ /* Information about a function's frame layout.  */
+-struct GTY(())  loongarch_frame_info {
++struct GTY (()) loongarch_frame_info
++{
+   /* The size of the frame in bytes.  */
+   HOST_WIDE_INT total_size;
+ 
+@@ -1930,216 +1252,67 @@ struct GTY(())  loongarch_frame_info {
+   /* Bit X is set if the function saves or restores GPR X.  */
+   unsigned int mask;
+ 
++  unsigned int gpr_saved_num;
++
+   /* Likewise FPR X.  */
+   unsigned int fmask;
+ 
+-  /* Likewise doubleword accumulator X ($acX).  */
+-  unsigned int acc_mask;
+-
+-  /* The number of GPRs, FPRs, doubleword accumulators and COP0
+-     registers saved.  */
+-  unsigned int num_gp;
+-  unsigned int num_fp;
+-  unsigned int num_acc;
+-  unsigned int num_cop0_regs;
+-
+-  /* The offset of the topmost GPR, FPR, accumulator and COP0-register
+-     save slots from the top of the frame, or zero if no such slots are
+-     needed.  */
+-  HOST_WIDE_INT gp_save_offset;
+-  HOST_WIDE_INT fp_save_offset;
+-  HOST_WIDE_INT acc_save_offset;
+-  HOST_WIDE_INT cop0_save_offset;
+-
+-  /* Likewise, but giving offsets from the bottom of the frame.  */
++  /* How much the GPR save/restore routines adjust sp (or 0 if unused).  */
++  unsigned save_libcall_adjustment;
++
++  /* Offsets of fixed-point and floating-point save areas from frame
++     bottom.  */
+   HOST_WIDE_INT gp_sp_offset;
+   HOST_WIDE_INT fp_sp_offset;
+-  HOST_WIDE_INT acc_sp_offset;
+-  HOST_WIDE_INT cop0_sp_offset;
+ 
+-  /* Similar, but the value passed to _mcount.  */
+-  HOST_WIDE_INT ra_fp_offset;
+-
+-  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
+-  HOST_WIDE_INT arg_pointer_offset;
++  /* Offset of virtual frame pointer from stack pointer/frame bottom.  */
++  HOST_WIDE_INT frame_pointer_offset;
+ 
+-  /* The offset of hard_frame_pointer_rtx from the bottom of the frame.  */
++  /* Offset of hard frame pointer from stack pointer/frame bottom.  */
+   HOST_WIDE_INT hard_frame_pointer_offset;
+ 
+-  /* How much the GPR save/restore routines adjust sp (or 0 if unused).  */
+-  unsigned save_libcall_adjustment;
+-
+-  /* Offset of virtual frame pointer from stack pointer/frame bottom */
+-  HOST_WIDE_INT frame_pointer_offset;
+-};
+-
+-/* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts.  */
+-enum loongarch_int_mask
+-{
+-  INT_MASK_EIC = -1,
+-  INT_MASK_SW0 = 0,
+-  INT_MASK_SW1 = 1,
+-  INT_MASK_HW0 = 2,
+-  INT_MASK_HW1 = 3,
+-  INT_MASK_HW2 = 4,
+-  INT_MASK_HW3 = 5,
+-  INT_MASK_HW4 = 6,
+-  INT_MASK_HW5 = 7
++  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
++  HOST_WIDE_INT arg_pointer_offset;
+ };
+ 
+-/* Enumeration to mark the existence of the shadow register set.
+-   SHADOW_SET_INTSTACK indicates a shadow register set with a valid stack
+-   pointer.  */
+-enum loongarch_shadow_set
++struct GTY (()) machine_function
+ {
+-  SHADOW_SET_NO,
+-  SHADOW_SET_YES,
+-  SHADOW_SET_INTSTACK
+-};
+-
+-struct GTY(())  machine_function {
+   /* The next floating-point condition-code register to allocate
+-     for 8CC targets, relative to ST_REG_FIRST.  */
++     for 8CC targets, relative to FCC_REG_FIRST.  */
+   unsigned int next_fcc;
+ 
+   /* The number of extra stack bytes taken up by register varargs.
+      This area is allocated by the callee at the very top of the frame.  */
+   int varargs_size;
+ 
+-  /* The current frame information, calculated by loongarch_compute_frame_info.  */
++  /* The current frame information, calculated by loongarch_compute_frame_info.
++   */
+   struct loongarch_frame_info frame;
+-
+-  /* How many instructions it takes to load a label into $AT, or 0 if
+-     this property hasn't yet been calculated.  */
+-  unsigned int load_label_num_insns;
+-
+-  /* True if loongarch_adjust_insn_length should ignore an instruction's
+-     hazard attribute.  */
+-  bool ignore_hazard_length_p;
+-
+-  /* True if the whole function is suitable for .set noreorder and
+-     .set nomacro.  */
+-  bool all_noreorder_p;
+-
+-  /* True if the function has "inflexible" and "flexible" references
+-     to the global pointer.  See loongarch_cfun_has_inflexible_gp_ref_p
+-     and loongarch_cfun_has_flexible_gp_ref_p for details.  */
+-  bool has_inflexible_gp_insn_p;
+-  bool has_flexible_gp_insn_p;
+-
+-  /* True if the function's prologue must load the global pointer
+-     value into pic_offset_table_rtx and store the same value in
+-     the function's cprestore slot (if any).  Even if this value
+-     is currently false, we may decide to set it to true later;
+-     see loongarch_must_initialize_gp_p () for details.  */
+-  bool must_initialize_gp_p;
+-
+-  /* True if the current function must restore $gp after any potential
+-     clobber.  This value is only meaningful during the first post-epilogue
+-     split_insns pass; see loongarch_must_initialize_gp_p () for details.  */
+-  bool must_restore_gp_when_clobbered_p;
+-
+-  /* True if this is an interrupt handler.  */
+-  bool interrupt_handler_p;
+-
+-  /* Records the way in which interrupts should be masked.  Only used if
+-     interrupts are not kept masked.  */
+-  enum loongarch_int_mask int_mask;
+-
+-  /* Records if this is an interrupt handler that uses shadow registers.  */
+-  enum loongarch_shadow_set use_shadow_register_set;
+-
+-  /* True if this is an interrupt handler that should keep interrupts
+-     masked.  */
+-  bool keep_interrupts_masked_p;
+-
+-  /* True if this is an interrupt handler that should use DERET
+-     instead of ERET.  */
+-  bool use_debug_exception_return_p;
+-
+-  /* True if at least one of the formal parameters to a function must be
+-     written to the frame header (probably so its address can be taken).  */
+-  bool does_not_use_frame_header;
+-
+-  /* True if none of the functions that are called by this function need
+-     stack space allocated for their arguments.  */
+-  bool optimize_call_stack;
+-
+-  /* True if one of the functions calling this function may not allocate
+-     a frame header.  */
+-  bool callers_may_not_allocate_frame;
+-
+-  /* True if GCC stored callee saved registers in the frame header.  */
+-  bool use_frame_header_for_callee_saved_regs;
+ };
+ #endif
+ 
+-/* Enable querying of DFA units.  */
+-#define CPU_UNITS_QUERY 0
+-
+-/* As on most targets, we want the .eh_frame section to be read-only where
+-   possible.  And as on most targets, this means two things:
+-
+-     (a) Non-locally-binding pointers must have an indirect encoding,
+-	 so that the addresses in the .eh_frame section itself become
+-	 locally-binding.
+-
+-     (b) A shared library's .eh_frame section must encode locally-binding
+-	 pointers in a relative (relocation-free) form.
+-
+-   However, LARCH has traditionally not allowed directives like:
+-
+-	.long	x-.
+-
+-   in cases where "x" is in a different section, or is not defined in the
+-   same assembly file.  We are therefore unable to emit the PC-relative
+-   form required by (b) at assembly time.
+-
+-   Fortunately, the linker is able to convert absolute addresses into
+-   PC-relative addresses on our behalf.  Unfortunately, only certain
+-   versions of the linker know how to do this for indirect pointers,
+-   and for personality data.  We must fall back on using writable
+-   .eh_frame sections for shared libraries if the linker does not
+-   support this feature.  */
+-#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
++#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+   (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
+ 
+-#define SWITCHABLE_TARGET 1
+-
+-/* Several named LARCH patterns depend on Pmode.  These patterns have the
+-   form <NAME>_si for Pmode == SImode and <NAME>_di for Pmode == DImode.
++/* Several named LoongArch patterns depend on Pmode.  These patterns have the
++   form <NAME>si for Pmode == SImode and <NAME>di for Pmode == DImode.
+    Add the appropriate suffix to generator function NAME and invoke it
+    with arguments ARGS.  */
+ #define PMODE_INSN(NAME, ARGS) \
+-  (Pmode == SImode ? NAME ## _si ARGS : NAME ## _di ARGS)
++  (Pmode == SImode ? NAME##si ARGS : NAME##di ARGS)
++
++/* Do emit .note.GNU-stack by default.  */
++#ifndef NEED_INDICATE_EXEC_STACK
++#define NEED_INDICATE_EXEC_STACK 1
++#endif
+ 
+-/***********************/
+-/*     N_LARCH-PORT      */
+-/***********************/
+ /* The `Q' extension is not yet supported.  */
+-/* TODO: according to march */
++/* TODO: according to march.  */
+ #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
+ 
+ /* The largest type that can be passed in floating-point registers.  */
+-/* TODO: according to mabi */
+-#define UNITS_PER_FP_ARG (TARGET_HARD_FLOAT ? (TARGET_64BIT ? 8 : 4) : 0)
+-
+-/* Internal macros to classify an ISA register's type.  */
+-
+-#define GP_TEMP_FIRST (GP_REG_FIRST + 12)
+-
+-#define CALLEE_SAVED_REG_NUMBER(REGNO)			\
+-  ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) - 22 : -1)
+-
+-#define N_LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1)
+-#define N_LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, N_LARCH_PROLOGUE_TEMP_REGNUM)
+-
+-#define LIBCALL_VALUE(MODE) \
+-  loongarch_function_value (NULL_TREE, NULL_TREE, MODE)
+-
+-#define FUNCTION_VALUE(VALTYPE, FUNC) \
+-  loongarch_function_value (VALTYPE, FUNC, VOIDmode)
+-
+-#define FRAME_GROWS_DOWNWARD 1
++/* TODO: according to mabi.  */
++#define UNITS_PER_FP_ARG  \
++  (TARGET_HARD_FLOAT ? (TARGET_DOUBLE_FLOAT ? 8 : 4) : 0)
+ 
+ #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN)
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index be950c9e4..097c9f4db 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1,7 +1,7 @@
+-;;  Loongarch.md	     Machine Description for LARCH based processors
+-;;  Copyright (C) 1989-2018 Free Software Foundation, Inc.
+-;;  Contributed by   A. Lichnewsky, lich@inria.inria.fr
+-;;  Changes by       Michael Meissner, meissner@osf.org
++;; Machine Description for LoongArch for GNU compiler.
++;; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;; Contributed by Loongson Ltd.
++;; Based on MIPS target for GNU compiler.
+ 
+ ;; This file is part of GCC.
+ 
+@@ -19,118 +19,96 @@
+ ;; along with GCC; see the file COPYING3.  If not see
+ ;; <http://www.gnu.org/licenses/>.
+ 
+-(define_enum "processor" [
+-  loongarch
+-  loongarch64
+-  la464
+-])
+-
+ (define_c_enum "unspec" [
+   ;; Integer operations that are too cumbersome to describe directly.
+-  UNSPEC_WSBH
+-  UNSPEC_DSBH
+-  UNSPEC_DSHD
++  UNSPEC_REVB_2H
++  UNSPEC_REVB_4H
++  UNSPEC_REVH_D
+ 
+   ;; Floating-point moves.
+   UNSPEC_LOAD_LOW
+   UNSPEC_LOAD_HIGH
+   UNSPEC_STORE_WORD
+   UNSPEC_MOVGR2FRH
++  UNSPEC_MOVGR2FR
+   UNSPEC_MOVFRH2GR
++  UNSPEC_MOVFR2GR
++  UNSPEC_MOVFCC2GR
++  UNSPEC_MOVGR2FCC
++  UNSPEC_MOVFR2FCC
+ 
+-  ;; Floating-point environment.
+-  UNSPEC_MOVFCSR2GR
+-  UNSPEC_MOVGR2FCSR
++  ;; Floating point unspecs.
++  UNSPEC_FRINT
++  UNSPEC_FCLASS
++  UNSPEC_FCOPYSIGN
+ 
+-  ;; GP manipulation.
++  ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+ 
+-  ;;
+-  UNSPEC_FRINT
+-  UNSPEC_FCLASS
++  ;; Bit operation
+   UNSPEC_BYTEPICK_W
+   UNSPEC_BYTEPICK_D
+   UNSPEC_BITREV_4B
+   UNSPEC_BITREV_8B
+ 
+-  ;; Symbolic accesses.
+-  UNSPEC_LOAD_CALL
+-
+-  ;; Blockage and synchronisation.
+-  UNSPEC_BLOCKAGE
+-  UNSPEC_DBAR
+-  UNSPEC_IBAR
+-
+-  ;; CPUCFG
+-  UNSPEC_CPUCFG
+-  UNSPEC_ASRTLE_D
+-  UNSPEC_ASRTGT_D
+-
+-  UNSPEC_CSRRD
+-  UNSPEC_CSRWR
+-  UNSPEC_CSRXCHG
+-  UNSPEC_IOCSRRD
+-  UNSPEC_IOCSRWR
+-
+-  ;; cacop
+-  UNSPEC_CACOP
+-
+-  ;; pte
+-  UNSPEC_LDDIR
+-  UNSPEC_LDPTE
+-
+-  ;; Cache manipulation.
+-  UNSPEC_LARCH_CACHE
+-
+-  ;; Interrupt handling.
+-  UNSPEC_ERTN
+-  UNSPEC_DI
+-  UNSPEC_EHB
+-  UNSPEC_RDPGPR
+-
+-  ;; Used in a call expression in place of args_size.  It's present for PIC
+-  ;; indirect calls where it contains args_size and the function symbol.
+-  UNSPEC_CALL_ATTR
+-
+-
+-  ;; Stack checking.
+-  UNSPEC_PROBE_STACK_RANGE
+-
+-  ;; The `.insn' pseudo-op.
+-  UNSPEC_INSN_PSEUDO
+-
+   ;; TLS
+   UNSPEC_TLS_GD
+   UNSPEC_TLS_LD
+   UNSPEC_TLS_LE
+   UNSPEC_TLS_IE
+ 
+-  UNSPEC_LU52I_D
+-
++  ;; Stack tie
+   UNSPEC_TIE
+ 
+   ;; CRC
+   UNSPEC_CRC
+   UNSPEC_CRCC
+-  UNSPEC_ADDRESS_FIRST
+-])
+ 
+-(define_c_enum "unspecv" [
+-  ;; Register save and restore.
+-  UNSPECV_GPR_SAVE
+-  UNSPECV_GPR_RESTORE
++  ;; RSQRT
++  UNSPEC_RSQRT
++  UNSPEC_RSQRTE
+ 
+-  UNSPECV_MOVE_EXTREME
++  ;; RECIP
++  UNSPEC_RECIPE
+ ])
+ 
++(define_c_enum "unspecv" [
++  ;; Blockage and synchronisation.
++  UNSPECV_BLOCKAGE
++  UNSPECV_DBAR
++  UNSPECV_IBAR
++
++  ;; Privileged instructions
++  UNSPECV_CSRRD
++  UNSPECV_CSRWR
++  UNSPECV_CSRXCHG
++  UNSPECV_IOCSRRD
++  UNSPECV_IOCSRWR
++  UNSPECV_CACOP
++  UNSPECV_LDDIR
++  UNSPECV_LDPTE
++  UNSPECV_ERTN
++
++  ;; Stack checking
++  UNSPECV_PROBE_STACK_RANGE
++
++  ;; Floating-point environment
++  UNSPECV_MOVFCSR2GR
++  UNSPECV_MOVGR2FCSR
++
++  ;; Others
++  UNSPECV_CPUCFG
++  UNSPECV_ASRTLE_D
++  UNSPECV_ASRTGT_D
++  UNSPECV_SYSCALL
++  UNSPECV_BREAK
++])
+ 
+ (define_constants
+   [(RETURN_ADDR_REGNUM		1)
+    (T0_REGNUM			12)
+    (T1_REGNUM			13)
+    (S0_REGNUM			23)
+-   (S1_REGNUM			24)
+-   (S2_REGNUM			25)
+ 
+    ;; PIC long branch sequences are never longer than 100 bytes.
+    (MAX_PIC_BRANCH_LENGTH	100)
+@@ -148,9 +126,9 @@
+ (define_attr "got" "unset,load"
+   (const_string "unset"))
+ 
+-;; For jal instructions, this attribute is DIRECT when the target address
++;; For jirl instructions, this attribute is DIRECT when the target address
+ ;; is symbolic and INDIRECT when it is a register.
+-(define_attr "jal" "unset,direct,indirect"
++(define_attr "jirl" "unset,direct,indirect"
+   (const_string "unset"))
+ 
+ 
+@@ -158,7 +136,7 @@
+ ;; are as for "type" (see below) but there are also the following
+ ;; move-specific values:
+ ;;
+-;; sll0		"sll DEST,SRC,0", which on 64-bit targets is guaranteed
++;; sll0		"slli.w DEST,SRC,0", which on 64-bit targets is guaranteed
+ ;;		to produce a sign-extended DEST, even if SRC is not
+ ;;		properly sign-extended
+ ;; pick_ins	BSTRPICK.W, BSTRPICK.D, BSTRINS.W or BSTRINS.D instruction
+@@ -207,59 +185,6 @@
+ 	 (const_string "yes")]
+ 	(const_string "no")))
+ 
+-;; Attributes describing a sync loop.  These loops have the form:
+-;;
+-;;       if (RELEASE_BARRIER == YES) sync
+-;;    1: OLDVAL = *MEM
+-;;       if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2
+-;;         CMP  = 0 [delay slot]
+-;;       $TMP1 = OLDVAL & EXCLUSIVE_MASK
+-;;       $TMP2 = INSN1 (OLDVAL, INSN1_OP2)
+-;;       $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK)
+-;;       $AT |= $TMP1 | $TMP3
+-;;       if (!commit (*MEM = $AT)) goto 1.
+-;;         if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot]
+-;;       CMP  = 1
+-;;       if (ACQUIRE_BARRIER == YES) sync
+-;;    2:
+-;;
+-;; where "$" values are temporaries and where the other values are
+-;; specified by the attributes below.  Values are specified as operand
+-;; numbers and insns are specified as enums.  If no operand number is
+-;; specified, the following values are used instead:
+-;;
+-;;    - OLDVAL: $AT
+-;;    - CMP: NONE
+-;;    - NEWVAL: $AT
+-;;    - INCLUSIVE_MASK: -1
+-;;    - REQUIRED_OLDVAL: OLDVAL & INCLUSIVE_MASK
+-;;    - EXCLUSIVE_MASK: 0
+-;;
+-;; MEM and INSN1_OP2 are required.
+-;;
+-;; Ideally, the operand attributes would be integers, with -1 meaning "none",
+-;; but the gen* programs don't yet support that.
+-(define_attr "sync_mem" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_oldval" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_cmp" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_newval" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_inclusive_mask" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_exclusive_mask" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_required_oldval" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_insn1_op2" "none,0,1,2,3,4,5" (const_string "none"))
+-(define_attr "sync_insn1" "move,li,addu,addiu,subu,and,andi,or,ori,xor,xori"
+-  (const_string "move"))
+-(define_attr "sync_insn2" "nop,and,xor,not"
+-  (const_string "nop"))
+-;; Memory model specifier.
+-;; "0"-"9" values specify the operand that stores the memory model value.
+-;; "10" specifies MEMMODEL_ACQ_REL,
+-;; "11" specifies MEMMODEL_ACQUIRE.
+-(define_attr "sync_memmodel" "" (const_int 10))
+-
+-;; Accumulator operand for madd patterns.
+-(define_attr "accum_in" "none,0,1,2,3,4,5" (const_string "none"))
+-
+ ;; Classification of each insn.
+ ;; branch	conditional branch
+ ;; jump		unconditional jump
+@@ -273,8 +198,8 @@
+ ;; prefetch	memory prefetch (register + offset)
+ ;; prefetchx	memory indexed prefetch (register + register)
+ ;; condmove	conditional moves
+-;; mgtf		move generate register to float register
+-;; mftg		move float register to generate register
++;; mgtf		move general-purpose register to floating point register
++;; mftg		move floating point register to general-purpose register
+ ;; const	load constant
+ ;; arith	integer arithmetic instructions
+ ;; logical      integer logical instructions
+@@ -283,10 +208,9 @@
+ ;; signext      sign extend instructions
+ ;; clz		the clz and clo instructions
+ ;; trap		trap if instructions
+-;; imul		integer multiply 2 operands
+-;; imul3	integer multiply 3 operands
+-;; idiv3	integer divide 3 operands
+-;; move		integer register move ({,D}ADD{,U} with rt = 0)
++;; imul		integer multiply
++;; idiv		integer divide
++;; move		integer move
+ ;; fmove	floating point register move
+ ;; fadd		floating point add/subtract
+ ;; fmul		floating point multiply
+@@ -296,9 +220,11 @@
+ ;; fabs		floating point absolute value
+ ;; fneg		floating point negation
+ ;; fcmp		floating point compare
++;; fcopysign	floating point copysign
+ ;; fcvt		floating point convert
+ ;; fsqrt	floating point square root
+ ;; frsqrt       floating point reciprocal square root
++;; frsqrte      float point reciprocal square root approximate
+ ;; multi	multiword sequence (or user asm statements)
+ ;; atomic	atomic memory update instruction
+ ;; syncloop	memory atomic operation implemented as a sync loop
+@@ -307,16 +233,15 @@
+ (define_attr "type"
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+-   shift,slt,signext,clz,trap,imul,imul3,idiv3,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt,
+-   frsqrt,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat,
+-   multi,atomic,syncloop,nop,ghost,
++   shift,slt,signext,clz,trap,imul,idiv,move,
++   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
++   frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost,
+    simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
+    simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
+    simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
+    simd_permute,simd_shf,simd_sat,simd_pcnt,simd_copy,simd_branch,simd_clsx,
+    simd_fminmax,simd_logic,simd_move,simd_load,simd_store"
+-  (cond [(eq_attr "jal" "!unset") (const_string "call")
++  (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+ 
+ 	 (eq_attr "alu_type" "add,sub") (const_string "arith")
+@@ -362,35 +287,22 @@
+ 	      (eq_attr "dword_mode" "yes"))
+ 	   (const_string "multi")
+ 	 (eq_attr "move_type" "move") (const_string "move")
+-	 (eq_attr "move_type" "const") (const_string "const")
+-	 (eq_attr "sync_mem" "!none") (const_string "syncloop")]
++	 (eq_attr "move_type" "const") (const_string "const")]
+ 	(const_string "unknown")))
+ 
+-(define_attr "compact_form" "always,maybe,never"
+-  (cond [(eq_attr "jal" "direct")
+-	 (const_string "always")
+-	 (eq_attr "jal" "indirect")
+-	 (const_string "maybe")
+-	 (eq_attr "type" "jump")
+-	 (const_string "maybe")]
+-	(const_string "never")))
+-
+ ;; Mode for conversion types (fcvt)
+-;; I2S          integer to float single (SI/DI to SF)
+-;; I2D          integer to float double (SI/DI to DF)
+-;; S2I          float to integer (SF to SI/DI)
+-;; D2I          float to integer (DF to SI/DI)
+-;; D2S          double to float single
+-;; S2D          float single to double
+-
+-(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" 
++;; I2S	integer to float single (SI/DI to SF)
++;; I2D	integer to float double (SI/DI to DF)
++;; S2I	float to integer (SF to SI/DI)
++;; D2I	float to integer (DF to SI/DI)
++;; D2S	double to float single
++;; S2D	float single to double
++;; C2D  fcc to DI
++
++(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D"
+   (const_string "unknown"))
+ 
+-(define_attr "compression" "none,all"
+-  (const_string "none"))
+-
+-;; The number of individual instructions that a non-branch pattern generates,
+-;; using units of BASE_INSN_LENGTH.
++;; The number of individual instructions that a non-branch pattern generates
+ (define_attr "insn_count" ""
+   (cond [;; "Ghost" instructions occupy no space.
+ 	 (eq_attr "type" "ghost")
+@@ -425,84 +337,30 @@
+ 	 (eq_attr "move_type" "store,fpstore")
+ 	 (symbol_ref "loongarch_load_store_insns (operands[0], insn)")
+ 
+-	 (eq_attr "type" "idiv3")
++	 (eq_attr "type" "idiv")
+ 	 (symbol_ref "loongarch_idiv_insns (GET_MODE (PATTERN (insn)))")]
+ (const_int 1)))
+ 
+-;; Length of instruction in bytes.  The default is derived from "insn_count",
+-;; but there are special cases for branches (which must be handled here)
+-;; and for compressed single instructions.
+-
+-
+-
++;; Length of instruction in bytes.
+ (define_attr "length" ""
+    (cond [
+-          ;; Branch instructions have a range of [-0x20000,0x1fffc].
+-          ;; If a branch is outside this range, we have a choice of two
+-          ;; sequences.
+-          ;;
+-          ;; For PIC, an out-of-range branch like:
+-          ;;
+-          ;;    bne     r1,r2,target
+-          ;;
+-          ;; becomes the equivalent of:
+-          ;;
+-          ;;    beq     r1,r2,1f
+-          ;;    la      rd,target
+-          ;;    jr      rd
+-          ;; 1:
+-          ;;
+-          ;; The non-PIC case is similar except that we use a direct
+-          ;; jump instead of an la/jr pair.  Since the target of this
+-          ;; jump is an absolute 28-bit bit address (the other bits
+-          ;; coming from the address of the delay slot) this form cannot
+-          ;; cross a 256MB boundary.  We could provide the option of
+-          ;; using la/jr in this case too, but we do not do so at
+-          ;; present.
+-          ;;
+-          ;; from the shorten_branches reference address.
+-          (eq_attr "type" "branch")
+-          (cond [;; Any variant can handle the 17-bit range.
+-                 (and (le (minus (match_dup 0) (pc)) (const_int 65532))
+-                      (le (minus (pc) (match_dup 0)) (const_int 65534)))
+-                   (const_int 4)
+-
+-                 ;; The non-PIC case: branch, and J.
+-                 (match_test "TARGET_ABSOLUTE_JUMPS")
+-                   (const_int 8)]
+-
+-                 ;; Use MAX_PIC_BRANCH_LENGTH as a (gross) overestimate.
+-                 ;; loongarch_adjust_insn_length substitutes the correct length.
+-                 ;;
+-                 ;; Note that we can't simply use (symbol_ref ...) here
+-                 ;; because genattrtab needs to know the maximum length
+-                 ;; of an insn.
+-                 (const_int MAX_PIC_BRANCH_LENGTH))
+-		]
+-         (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH")))
+-
+-;; Attribute describing the processor.
+-(define_enum_attr "cpu" "processor"
+-  (const (symbol_ref "loongarch_tune")))
++	  ;; Branching further than +/- 128 KiB requires two instructions.
++	  (eq_attr "type" "branch")
++	  (if_then_else (and (le (minus (match_dup 0) (pc)) (const_int 131064))
++			     (le (minus (pc) (match_dup 0)) (const_int 131068)))
++	  (const_int 4)
++	  (const_int 8))]
++    (symbol_ref "get_attr_insn_count (insn) * 4")))
+ 
+ ;; The type of hardware hazard associated with this instruction.
+ ;; DELAY means that the next instruction cannot read the result
+ ;; of this one.
+-(define_attr "hazard" "none,delay,forbidden_slot"
++(define_attr "hazard" "none,forbidden_slot"
+   (const_string "none"))
+ 
+-;; Can the instruction be put into a delay slot?
+-(define_attr "can_delay" "no,yes"
+-  (if_then_else (and (eq_attr "type" "!branch,call,jump")
+-		     (eq_attr "hazard" "none")
+-		     (match_test "get_attr_insn_count (insn) == 1"))
+-		(const_string "yes")
+-		(const_string "no")))
+-
+ ;; Describe a user's asm statement.
+ (define_asm_attributes
+-  [(set_attr "type" "multi")
+-   (set_attr "can_delay" "no")])
++  [(set_attr "type" "multi")])
+ 
+ ;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated
+ ;; from the same template.
+@@ -512,141 +370,99 @@
+ ;; modes.
+ (define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
+ 
+-;; Likewise, but for XLEN-sized quantities.
+-(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
+-
+-(define_mode_iterator MOVEP1 [SI SF])
+-(define_mode_iterator MOVEP2 [SI SF])
++;; This mode iterator allows 16-bit and 32-bit GPR patterns and 32-bit 64-bit
++;; FPR patterns to be generated from the same template.
+ (define_mode_iterator JOIN_MODE [HI
+ 				 SI
+ 				 (SF "TARGET_HARD_FLOAT")
+-				 (DF "TARGET_HARD_FLOAT
+-				      && TARGET_DOUBLE_FLOAT")])
++				 (DF "TARGET_DOUBLE_FLOAT")])
+ 
+ ;; This mode iterator allows :P to be used for patterns that operate on
+ ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+ (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+ 
+-;; 32-bit integer moves for which we provide move patterns.
+-(define_mode_iterator IMOVE32
+-  [SI])
++;; Likewise, but for XLEN-sized quantities.
++(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
+ 
+ ;; 64-bit modes for which we provide move patterns.
+-(define_mode_iterator MOVE64
+-  [DI DF])
++(define_mode_iterator MOVE64 [DI DF])
+ 
+ ;; 128-bit modes for which we provide move patterns on 64-bit targets.
+ (define_mode_iterator MOVE128 [TI TF])
+ 
+-;; This mode iterator allows the QI and HI extension patterns to be
+-;; defined from the same template.
++;; Iterator for sub-32-bit integer modes.
+ (define_mode_iterator SHORT [QI HI])
+ 
+ ;; Likewise the 64-bit truncate-and-shift patterns.
+ (define_mode_iterator SUBDI [QI HI SI])
+ 
+-;; This mode iterator allows the QI HI SI and DI extension patterns to be
++;; Iterator for scalar fixed-point modes.
+ (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")])
+ 
+-
+-;; This mode iterator allows :ANYF to be used wherever a scalar or vector
+-;; floating-point mode is allowed.
++;; Iterator for hardware-supported floating-point modes.
+ (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
+-			    (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")])
+-
+-;; Like ANYF, but only applies to scalar modes.
+-(define_mode_iterator SCALARF [(SF "TARGET_HARD_FLOAT")
+-			       (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")])
++			    (DF "TARGET_DOUBLE_FLOAT")])
+ 
+ ;; A floating-point mode for which moves involving FPRs may need to be split.
+ (define_mode_iterator SPLITF
+   [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+    (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+-   (TF "TARGET_64BIT && TARGET_FLOAT64")])
++   (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")])
+ 
+-;; In GPR templates, a string like "mul.<d>" will expand to "mul" in the
+-;; 32-bit "mul.w" and "mul.d" in the 64-bit version.
++;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
++;; 32-bit version and "mul.d" in the 64-bit version.
+ (define_mode_attr d [(SI "w") (DI "d")])
+ 
+-;; Same as d but upper-case.
+-(define_mode_attr D [(SI "") (DI "D")])
+-
+ ;; This attribute gives the length suffix for a load or store instruction.
+ ;; The same suffixes work for zero and sign extensions.
+ (define_mode_attr size [(QI "b") (HI "h") (SI "w") (DI "d")])
+ (define_mode_attr SIZE [(QI "B") (HI "H") (SI "W") (DI "D")])
+ 
+-;; This attributes gives the mode mask of a SHORT.
++;; This attribute gives the mode mask of a SHORT.
+ (define_mode_attr mask [(QI "0x00ff") (HI "0xffff")])
+ 
+-;; This attributes gives the size (bits) of a SHORT.
+-(define_mode_attr qi_hi [(QI "7") (HI "15")])
+-
+-;; Mode attributes for GPR loads.
+-(define_mode_attr load [(SI "lw") (DI "ld")])
++;; This attribute gives the size (bits) of a SHORT.
++(define_mode_attr 7_or_15 [(QI "7") (HI "15")])
+ 
+-(define_mode_attr load_l [(SI "ld.w") (DI "ld.d")])
+ ;; Instruction names for stores.
+ (define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd")])
+ 
+-;; Similarly for LARCH IV indexed FPR loads and stores.
+-(define_mode_attr floadx [(SF "fldx.s") (DF "fldx.d") (V2SF "fldx.d")])
+-(define_mode_attr fstorex [(SF "fstx.s") (DF "fstx.d") (V2SF "fstx.d")])
+-
+-;; Similarly for LOONGSON indexed GPR loads and stores.
++;; Similarly for LoongArch indexed GPR loads and stores.
+ (define_mode_attr loadx [(QI "ldx.b")
+-                           (HI "ldx.h")
+-                           (SI "ldx.w")
+-                           (DI "ldx.d")])
++                        (HI "ldx.h")
++                        (SI "ldx.w")
++                        (DI "ldx.d")])
+ (define_mode_attr storex [(QI "stx.b")
+-                            (HI "stx.h")
+-                            (SI "stx.w")
+-                            (DI "stx.d")])
+-
+-;; This attribute gives the best constraint to use for registers of
+-;; a given mode.
+-(define_mode_attr reg [(SI "d") (DI "d") (FCC "z")])
++                         (HI "stx.h")
++                         (SI "stx.w")
++                         (DI "stx.d")])
+ 
+ ;; This attribute gives the format suffix for floating-point operations.
+ (define_mode_attr fmt [(SF "s") (DF "d") (V2SF "ps")])
++(define_mode_attr ifmt [(SI "w") (DI "l")])
+ 
+ ;; This attribute gives the upper-case mode name for one unit of a
+ ;; floating-point mode or vector mode.
+ (define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF") (V4SF "SF")
+-			    (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
+-			    (V2DF "DF")(V8SF "SF")(V32QI "QI")(V16HI "HI")(V8SI "SI")(V4DI "DI")(V4DF "DF")])
++                           (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
++                           (V2DF "DF")(V8SF "SF")(V32QI "QI")(V16HI "HI")(V8SI "SI")(V4DI "DI")(V4DF "DF")])
+ 
+ ;; As above, but in lower case.
+ (define_mode_attr unitmode [(SF "sf") (DF "df") (V2SF "sf") (V4SF "sf")
+-			    (V16QI "qi") (V8QI "qi") (V8HI "hi") (V4HI "hi")
+-			    (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df")
+-			    (V8SI "si") (V4DI "di") (V32QI "qi") (V16HI "hi")
++                           (V16QI "qi") (V8QI "qi") (V8HI "hi") (V4HI "hi")
++                           (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df")
++                           (V8SI "si") (V4DI "di") (V32QI "qi") (V16HI "hi")
+                             (V8SF "sf") (V4DF "df")])
+ 
+ ;; This attribute gives the integer mode that has half the size of
+ ;; the controlling mode.
+ (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI")
+-			    (V2SI "SI") (V4HI "SI") (V8QI "SI")
+-			    (TF "DI")])
++                           (V2SI "SI") (V4HI "SI") (V8QI "SI")
++                           (TF "DI")])
+ 
++;; This attribute gives the integer prefix for some instructions templates.
+ (define_mode_attr p [(SI "") (DI "d")])
+ 
+-;; This attribute works around the early SB-1 rev2 core "F2" erratum:
+-;;
+-;; In certain cases, div.s and div.ps may have a rounding error
+-;; and/or wrong inexact flag.
+-;;
+-;; Therefore, we only allow div.s if not working around SB-1 rev2
+-;; errata or if a slight loss of precision is OK.
+-(define_mode_attr divide_condition
+-  [DF (SF "flag_unsafe_math_optimizations")
+-   (V2SF "TARGET_SB1 && (flag_unsafe_math_optimizations)")])
+-
+-;; This attribute gives the conditions under which SQRT.fmt instructions
+-;; can be used.
+-(define_mode_attr sqrt_condition
+-  [SF DF (V2SF "TARGET_SB1")])
+-
+ ;; This code iterator allows signed and unsigned widening multiplications
+ ;; to use the same template.
+ (define_code_iterator any_extend [sign_extend zero_extend])
+@@ -659,13 +475,10 @@
+ ;; from the same template.
+ (define_code_iterator any_shift [ashift ashiftrt lshiftrt])
+ 
+-;; This code iterator allows unsigned and signed division to be generated
+-;; from the same template.
+-(define_code_iterator any_div [div udiv])
+-
+-;; This code iterator allows unsigned and signed modulus to be generated
++;; This code iterator allows the three bitwise instructions to be generated
+ ;; from the same template.
+-(define_code_iterator any_mod [mod umod])
++(define_code_iterator any_bitwise [and ior xor])
++(define_code_iterator neg_bitwise [and ior])
+ 
+ ;; This code iterator allows addition and subtraction to be generated
+ ;; from the same template.
+@@ -679,13 +492,14 @@
+ ;; from the same template
+ (define_code_iterator addsubmul [plus minus mult])
+ 
++;; This code iterator allows unsigned and signed division to be generated
++;; from the same template.
++(define_code_iterator any_div [div udiv mod umod])
++
+ ;; This code iterator allows all native floating-point comparisons to be
+ ;; generated from the same template.
+-(define_code_iterator fcond [unordered uneq unlt unle eq lt le ordered ltgt ne])
+-
+-;; This code iterator is used for comparisons that can be implemented
+-;; by swapping the operands.
+-(define_code_iterator swapped_fcond [ge gt unge ungt])
++(define_code_iterator fcond [unordered uneq unlt unle eq lt le
++			     ordered ltgt ne ge gt unge ungt])
+ 
+ ;; Equality operators.
+ (define_code_iterator equality_op [eq ne])
+@@ -725,6 +539,10 @@
+ 			 (plus "add")
+ 			 (minus "sub")
+ 			 (mult "mul")
++			 (div "div")
++			 (udiv "udiv")
++			 (mod "mod")
++			 (umod "umod")
+ 			 (return "return")
+ 			 (simple_return "simple_return")])
+ 
+@@ -736,15 +554,13 @@
+ 			(xor "xor")
+ 			(and "and")
+ 			(plus "addu")
+-			(minus "subu")])
+-
+-;; <immediate_insn> expands to the name of the insn that implements
+-;; a particular code to operate on immediate values.
+-(define_code_attr immediate_insn [(ior "ori")
+-				  (xor "xori")
+-				  (and "andi")])
++			(minus "subu")
++			(div "div")
++			(udiv "div")
++			(mod "mod")
++			(umod "mod")])
+ 
+-;; <fcond> is the c.cond.fmt condition associated with a particular code.
++;; <fcond> is the fcmp.cond.fmt condition associated with a particular code.
+ (define_code_attr fcond [(unordered "cun")
+ 			 (uneq "cueq")
+ 			 (unlt "cult")
+@@ -754,48 +570,17 @@
+ 			 (le "sle")
+ 			 (ordered "cor")
+ 			 (ltgt "sne")
+-			 (ne "cune")])
+-
+-;; Similar, but for swapped conditions.
+-(define_code_attr swapped_fcond [(ge "sle")
+-				 (gt "slt")
+-				 (unge "cule")
+-				 (ungt "cult")])
+-
+-;; The value of the bit when the branch is taken for branch_bit patterns.
+-;; Comparison is always against zero so this depends on the operator.
+-(define_code_attr bbv [(eq "0") (ne "1")])
+-
+-;; This is the inverse value of bbv.
+-(define_code_attr bbinv [(eq "1") (ne "0")])
++			 (ne "cune")
++			 (ge "sge")
++			 (gt "sgt")
++			 (unge "cuge")
++			 (ungt "cugt")])
+ 
+ ;; The sel mnemonic to use depending on the condition test.
+ (define_code_attr sel [(eq "masknez") (ne "maskeqz")])
++(define_code_attr fsel_invert [(eq "%2,%3") (ne "%3,%2")])
+ (define_code_attr selinv [(eq "maskeqz") (ne "masknez")])
+-
+-;; Pipeline descriptions.
+-;;
+-;; generic.md provides a fallback for processors without a specific
+-;; pipeline description.  It is derived from the old define_function_unit
+-;; version and uses the "alu" and "imuldiv" units declared below.
+-;;
+-;; Some of the processor-specific files are also derived from old
+-;; define_function_unit descriptions and simply override the parts of
+-;; generic.md that don't apply.  The other processor-specific files
+-;; are self-contained.
+-(define_automaton "alu,imuldiv")
+ 
+-(define_cpu_unit "alu" "alu")
+-(define_cpu_unit "imuldiv" "imuldiv")
+-
+-;; Ghost instructions produce no real code and introduce no hazards.
+-;; They exist purely to express an effect on dataflow.
+-(define_insn_reservation "ghost" 0
+-  (eq_attr "type" "ghost")
+-  "nothing")
+-
+-(include "generic.md")
+-
+ ;;
+ ;;  ....................
+ ;;
+@@ -831,37 +616,22 @@
+   [(set_attr "type" "fadd")
+    (set_attr "mode" "<UNITMODE>")])
+ 
+-(define_expand "add<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(plus:GPR (match_operand:GPR 1 "register_operand")
+-		  (match_operand:GPR 2 "arith_operand")))]
+-  "")
+-
+-(define_insn "*add<mode>3"
++(define_insn "add<mode>3"
+   [(set (match_operand:GPR 0 "register_operand" "=r,r")
+ 	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r")
+-		  (match_operand:GPR 2 "arith_operand" "r,Q")))]
++		  (match_operand:GPR 2 "arith_operand" "r,I")))]
+   ""
+-{
+-  if (which_alternative == 0)
+-    return "add.<d>\t%0,%1,%2";
+-  else
+-    return "addi.<d>\t%0,%1,%2";
+-}
++  "add%i2.<d>\t%0,%1,%2";
+   [(set_attr "alu_type" "add")
+-   (set_attr "compression" "*,*")
+    (set_attr "mode" "<MODE>")])
+ 
+-
+ (define_insn "*addsi3_extended"
+   [(set (match_operand:DI 0 "register_operand" "=r,r")
+ 	(sign_extend:DI
+ 	     (plus:SI (match_operand:SI 1 "register_operand" "r,r")
+-		      (match_operand:SI 2 "arith_operand" "r,Q"))))]
++		      (match_operand:SI 2 "arith_operand" "r,I"))))]
+   "TARGET_64BIT"
+-  "@
+-    add.w\t%0,%1,%2
+-    addi.w\t%0,%1,%2"
++  "add%i2.w\t%0,%1,%2"
+   [(set_attr "alu_type" "add")
+    (set_attr "mode" "SI")])
+ 
+@@ -885,23 +655,23 @@
+ 
+ (define_insn "sub<mode>3"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+-	(minus:GPR (match_operand:GPR 1 "register_operand" "r")
++	(minus:GPR (match_operand:GPR 1 "register_operand" "rJ")
+ 		   (match_operand:GPR 2 "register_operand" "r")))]
+   ""
+-  "sub.<d>\t%0,%1,%2"
++  "sub.<d>\t%0,%z1,%2"
+   [(set_attr "alu_type" "sub")
+-   (set_attr "compression" "*")
+    (set_attr "mode" "<MODE>")])
+ 
++
+ (define_insn "*subsi3_extended"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(sign_extend:DI
+-	    (minus:SI (match_operand:SI 1 "register_operand" "r")
+-		      (match_operand:SI 2 "register_operand" "r"))))]
++	     (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
++		       (match_operand:SI 2 "register_operand" "r"))))]
+   "TARGET_64BIT"
+-  "sub.w\t%0,%1,%2"
+-  [(set_attr "alu_type" "sub")
+-   (set_attr "mode" "DI")])
++  "sub.w\t%0,%z1,%2"
++  [(set_attr "type" "arith")
++   (set_attr "mode" "SI")])
+ 
+ ;;
+ ;;  ....................
+@@ -911,17 +681,10 @@
+ ;;  ....................
+ ;;
+ 
+-(define_expand "mul<mode>3"
+-  [(set (match_operand:SCALARF 0 "register_operand")
+-	(mult:SCALARF (match_operand:SCALARF 1 "register_operand")
+-		      (match_operand:SCALARF 2 "register_operand")))]
+-  ""
+-  "")
+-
+-(define_insn "*mul<mode>3"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-	(mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+-		      (match_operand:SCALARF 2 "register_operand" "f")))]
++(define_insn "mul<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		   (match_operand:ANYF 2 "register_operand" "f")))]
+   ""
+   "fmul.<fmt>\t%0,%1,%2"
+   [(set_attr "type" "fmul")
+@@ -933,20 +696,27 @@
+ 		  (match_operand:GPR 2 "register_operand" "r")))]
+   ""
+   "mul.<d>\t%0,%1,%2"
+-  [(set_attr "type" "imul3")
++  [(set_attr "type" "imul")
+    (set_attr "mode" "<MODE>")])
+ 
+-
+-
+ (define_insn "mulsidi3_64bit"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+ 		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+-  ""
++  "TARGET_64BIT"
+   "mul.d\t%0,%1,%2"
+-  [(set_attr "type" "imul3")
++  [(set_attr "type" "imul")
+    (set_attr "mode" "DI")])
+ 
++(define_insn "*mulsi3_extended"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	    (mult:SI (match_operand:SI 1 "register_operand" "r")
++		     (match_operand:SI 2 "register_operand" "r"))))]
++  "TARGET_64BIT"
++  "mul.w\t%0,%1,%2"
++  [(set_attr "type" "imul")
++   (set_attr "mode" "SI")])
+ 
+ ;;
+ ;;  ........................
+@@ -956,9 +726,8 @@
+ ;;  ........................
+ ;;
+ 
+-
+ (define_expand "<u>mulditi3"
+-  [(set (match_operand:TI                         0 "register_operand")
++  [(set (match_operand:TI 0 "register_operand")
+ 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
+ 		 (any_extend:TI (match_operand:DI 2 "register_operand"))))]
+   "TARGET_64BIT"
+@@ -975,7 +744,7 @@
+ })
+ 
+ (define_insn "<u>muldi3_highpart"
+-  [(set (match_operand:DI                0 "register_operand" "=r")
++  [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(truncate:DI
+ 	  (lshiftrt:TI
+ 	    (mult:TI (any_extend:TI
+@@ -989,7 +758,7 @@
+    (set_attr "mode" "DI")])
+ 
+ (define_expand "<u>mulsidi3"
+-  [(set (match_operand:DI            0 "register_operand" "=r")
++  [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(mult:DI (any_extend:DI
+ 		   (match_operand:SI 1 "register_operand" " r"))
+ 		 (any_extend:DI
+@@ -1005,7 +774,7 @@
+ })
+ 
+ (define_insn "<u>mulsi3_highpart"
+-  [(set (match_operand:SI                0 "register_operand" "=r")
++  [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(truncate:SI
+ 	  (lshiftrt:DI
+ 	    (mult:DI (any_extend:DI
+@@ -1018,97 +787,35 @@
+   [(set_attr "type" "imul")
+    (set_attr "mode" "SI")])
+ 
+-;; Floating point multiply accumulate instructions.
+ 
+-(define_expand "fma<mode>4"
+-  [(set (match_operand:ANYF 0 "register_operand")
+-	(fma:ANYF (match_operand:ANYF 1 "register_operand")
+-		  (match_operand:ANYF 2 "register_operand")
+-		  (match_operand:ANYF 3 "register_operand")))]
+-  "TARGET_HARD_FLOAT")
++;;  ....................
++;;
++;;     FLOATING POINT COPYSIGN
++;;
++;;  ....................
++
++;;     FLOATING POINT COPYSIGN
++;;
++;;  ....................
+ 
+-(define_insn "*fma<mode>4_madd4"
++(define_insn "copysign<mode>3"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
+-		  (match_operand:ANYF 2 "register_operand" "f")
+-		  (match_operand:ANYF 3 "register_operand" "f")))]
++       (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
++                     (match_operand:ANYF 2 "register_operand" "f")]
++                    UNSPEC_FCOPYSIGN))]
+   "TARGET_HARD_FLOAT"
+-  "fmadd.<fmt>\t%0,%1,%2,%3"
+-  [(set_attr "type" "fmadd")
++  "fcopysign.<fmt>\t%0,%1,%2"
++  [(set_attr "type" "fcopysign")
+    (set_attr "mode" "<UNITMODE>")])
+ 
+-;; The fms, fnma, and fnms instructions can be used even when HONOR_NANS
+-;; is true because while IEEE 754-2008 requires the negate operation to
+-;; negate the sign of a NAN and the LARCH neg instruction does not do this,
+-;; the fma part of the instruction has no requirement on how the sign of
+-;; a NAN is handled and so the final sign bit of the entire operation is
+-;; undefined.
+-
+-(define_expand "fms<mode>4"
+-  [(set (match_operand:ANYF 0 "register_operand")
+-	(fma:ANYF (match_operand:ANYF 1 "register_operand")
+-		  (match_operand:ANYF 2 "register_operand")
+-		  (neg:ANYF (match_operand:ANYF 3 "register_operand"))))]
+-  "TARGET_HARD_FLOAT")
+-
+ 
+-(define_insn "*fms<mode>4_msub4"
+-  [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
+-		  (match_operand:ANYF 2 "register_operand" "f")
+-		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))]
+-  "TARGET_HARD_FLOAT"
+-  "fmsub.<fmt>\t%0,%1,%2,%3"
+-  [(set_attr "type" "fmadd")
+-   (set_attr "mode" "<UNITMODE>")])
++;;
+ 
+-;; fnma is defined in GCC as (fma (neg op1) op2 op3)
+-;; (-op1 * op2) + op3 ==> -(op1 * op2) + op3 ==> -((op1 * op2) - op3)
+-;; The loongarch nmsub instructions implement -((op1 * op2) - op3)
+-;; This transformation means we may return the wrong signed zero
+-;; so we check HONOR_SIGNED_ZEROS.
+ 
+-(define_expand "fnma<mode>4"
+-  [(set (match_operand:ANYF 0 "register_operand")
+-	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand"))
+-		  (match_operand:ANYF 2 "register_operand")
+-		  (match_operand:ANYF 3 "register_operand")))]
+-  "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (<MODE>mode)")
+ 
+-(define_insn "*fnma<mode>4_nmsub4"
+-  [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+-		  (match_operand:ANYF 2 "register_operand" "f")
+-		  (match_operand:ANYF 3 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (<MODE>mode)"
+-  "fnmsub.<fmt>\t%0,%1,%2,%3"
+-  [(set_attr "type" "fmadd")
+-   (set_attr "mode" "<UNITMODE>")])
+ 
+-;; fnms is defined as: (fma (neg op1) op2 (neg op3))
+-;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3)
+-;; The loongarch nmadd instructions implement -((op1 * op2) + op3)
+-;; This transformation means we may return the wrong signed zero
+-;; so we check HONOR_SIGNED_ZEROS.
+ 
+-(define_expand "fnms<mode>4"
+-  [(set (match_operand:ANYF 0 "register_operand")
+-	(fma:ANYF
+-	  (neg:ANYF (match_operand:ANYF 1 "register_operand"))
+-	  (match_operand:ANYF 2 "register_operand")
+-	  (neg:ANYF (match_operand:ANYF 3 "register_operand"))))]
+-  "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (<MODE>mode)")
+ 
+-(define_insn "*fnms<mode>4_nmadd4"
+-  [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(fma:ANYF
+-	  (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+-	  (match_operand:ANYF 2 "register_operand" "f")
+-	  (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))]
+-  "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (<MODE>mode)"
+-  "fnmadd.<fmt>\t%0,%1,%2,%3"
+-  [(set_attr "type" "fmadd")
+-   (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+ ;;  ....................
+@@ -1118,144 +825,315 @@
+ ;;  ....................
+ ;;
+ 
++;; Float division and modulus.
+ (define_expand "div<mode>3"
+   [(set (match_operand:ANYF 0 "register_operand")
+-	(div:ANYF (match_operand:ANYF 1 "reg_or_1_operand")
++	(div:ANYF (match_operand:ANYF 1 "register_operand")
+ 		  (match_operand:ANYF 2 "register_operand")))]
+-  "<divide_condition>"
++  ""
+ {
+-  if (const_1_operand (operands[1], <MODE>mode))
+-    if (!(ISA_HAS_FP_RECIP_RSQRT (<MODE>mode)
+-	  && flag_unsafe_math_optimizations))
+-      operands[1] = force_reg (<MODE>mode, operands[1]);
++  if (<MODE>mode == SFmode
++    && TARGET_RECIP_DIV
++    && optimize_insn_for_speed_p ()
++    && flag_finite_math_only && !flag_trapping_math
++    && flag_unsafe_math_optimizations)
++  {
++    loongarch_emit_swdivsf (operands[0], operands[1],
++           operands[2], SFmode);
++    DONE;
++  }
+ })
+ 
+-;; These patterns work around the early SB-1 rev2 core "F1" erratum:
+-;;
+-;; If an mftg1 or dmftg1 happens to access the floating point register
+-;; file at the same time a long latency operation (div, sqrt, recip,
+-;; sqrt) iterates an intermediate result back through the floating
+-;; point register file bypass, then instead returning the correct
+-;; register value the mftg1 or dmftg1 operation returns the intermediate
+-;; result of the long latency operation.
+-;;
+-;; The workaround is to insert an unconditional 'mov' from/to the
+-;; long latency op destination register.
+-
+ (define_insn "*div<mode>3"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+ 	(div:ANYF (match_operand:ANYF 1 "register_operand" "f")
+ 		  (match_operand:ANYF 2 "register_operand" "f")))]
+-  "<divide_condition>"
+-{
+-  return "fdiv.<fmt>\t%0,%1,%2";
+-}
++  ""
++  "fdiv.<fmt>\t%0,%1,%2"
+   [(set_attr "type" "fdiv")
+    (set_attr "mode" "<UNITMODE>")
+    (set_attr "insn_count" "1")])
+ 
++;; In 3A5000, the reciprocal operation is the same as the division operation.
++
+ (define_insn "*recip<mode>3"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+ 	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+ 		  (match_operand:ANYF 2 "register_operand" "f")))]
+-  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
+-{
+-    return "frecip.<fmt>\t%0,%2";
+-}
++  ""
++  "frecip.<fmt>\t%0,%2"
+   [(set_attr "type" "frdiv")
+    (set_attr "mode" "<UNITMODE>")
+    (set_attr "insn_count" "1")])
+ 
++;; In 3A6000, frecipe calculates the approximate value of the reciprocal operation
++
++(define_insn "recipe<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++    (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
++              UNSPEC_RECIPE))]
++  "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations && TARGET_RECIP_DIV"
++  "frecipe.<fmt>\t%0,%1"
++  [(set_attr "type" "frsqrte")
++   (set_attr "mode" "<UNITMODE>")
++   (set_attr "insn_count" "1")])
++
+ ;; Integer division and modulus.
++(define_expand "<optab><mode>3"
++  [(set (match_operand:GPR 0 "register_operand")
++	(any_div:GPR (match_operand:GPR 1 "register_operand")
++		     (match_operand:GPR 2 "register_operand")))]
++ ""
++{
++  if (GET_MODE (operands[0]) == SImode)
++    {
++      rtx reg1 = gen_reg_rtx (DImode);
++      rtx reg2 = gen_reg_rtx (DImode);
++
++      operands[1] = gen_rtx_SIGN_EXTEND (word_mode, operands[1]);
++      operands[2] = gen_rtx_SIGN_EXTEND (word_mode, operands[2]);
++
++      emit_insn (gen_rtx_SET (reg1, operands[1]));
++      emit_insn (gen_rtx_SET (reg2, operands[2]));
+ 
+-(define_insn "<u>div<mode>3"
++      emit_insn (gen_<optab>di3_fake (operands[0], reg1, reg2));
++      DONE;
++    }
++})
++
++(define_insn "*<optab><mode>3"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(any_div:GPR (match_operand:GPR 1 "register_operand" "r")
+ 		     (match_operand:GPR 2 "register_operand" "r")))]
+   ""
+-  {
+-      return loongarch_output_division ("div.<d><u>\t%0,%1,%2", operands);
+-  }
+-  [(set_attr "type" "idiv3")
++{
++  return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
++}
++  [(set_attr "type" "idiv")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "<u>mod<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand" "=&r")
+-	(any_mod:GPR (match_operand:GPR 1 "register_operand" "r")
+-		     (match_operand:GPR 2 "register_operand" "r")))]
++(define_insn "<optab>di3_fake"
++  [(set (match_operand:SI 0 "register_operand" "=&r")
++	(any_div:SI (match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "register_operand" "r")))]
+   ""
+-  {
+-      return loongarch_output_division ("mod.<d><u>\t%0,%1,%2", operands);
+-  }
+-  [(set_attr "type" "idiv3")
+-   (set_attr "mode" "<MODE>")])
+-
+-;;
+-;;  ....................
+-;;
+-;;	SQUARE ROOT
+-;;
+-;;  ....................
+-
+-;; These patterns work around the early SB-1 rev2 core "F1" erratum (see
+-;; "*div[sd]f3" comment for details).
+-
+-(define_insn "sqrt<mode>2"
+-  [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+-  "<sqrt_condition>"
+ {
+-    return "fsqrt.<fmt>\t%0,%1";
++  return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
+ }
+-  [(set_attr "type" "fsqrt")
+-   (set_attr "mode" "<UNITMODE>")
+-   (set_attr "insn_count" "1")])
++  [(set_attr "type" "idiv")
++   (set_attr "mode" "SI")])
+ 
+-(define_insn "*rsqrt<mode>a"
+-  [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+-		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
+-  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
+-{
+-    return "frsqrt.<fmt>\t%0,%2";
+-}
+-  [(set_attr "type" "frsqrt")
+-   (set_attr "mode" "<UNITMODE>")
+-   (set_attr "insn_count" "1")])
++;; Floating point multiply accumulate instructions.
+ 
+-(define_insn "*rsqrt<mode>b"
++;; a * b + c
++(define_insn "fma<mode>4"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+-			     (match_operand:ANYF 2 "register_operand" "f"))))]
+-  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
+-{
+-    return "frsqrt.<fmt>\t%0,%2";
+-}
+-  [(set_attr "type" "frsqrt")
+-   (set_attr "mode" "<UNITMODE>")
+-   (set_attr "insn_count" "1")])
+-
+-;;
+-;;  ....................
+-;;
+-;;	ABSOLUTE VALUE
+-;;
+-;;  ....................
+-
+-;; Do not use the integer abs macro instruction, since that signals an
+-;; exception on -2147483648 (sigh).
+-
+-;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic
+-;; instruction that treats all NaN inputs as invalid; it does not clear
+-;; their sign bit.  We therefore can't use that form if the signs of
+-;; NaNs matter.
++	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (match_operand:ANYF 3 "register_operand" "f")))]
++  ""
++  "fmadd.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
+ 
+-(define_insn "abs<mode>2"
++;; a * b - c
++(define_insn "fms<mode>4"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
++	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))]
+   ""
+-  "fabs.<fmt>\t%0,%1"
+-  [(set_attr "type" "fabs")
++  "fmsub.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;; fnma is defined in GCC as (fma (neg op1) op2 op3)
++;; (-op1 * op2) + op3 ==> -(op1 * op2) + op3 ==> -((op1 * op2) - op3)
++;; The loongarch nmsub instructions implement -((op1 * op2) - op3)
++;; This transformation means we may return the wrong signed zero
++;; so we check HONOR_SIGNED_ZEROS.
++
++;; -a * b + c
++(define_insn "fnma<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (match_operand:ANYF 3 "register_operand" "f")))]
++  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
++  "fnmsub.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;; fnms is defined as: (fma (neg op1) op2 (neg op3))
++;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3)
++;; The loongarch nmadd instructions implement -((op1 * op2) + op3)
++;; This transformation means we may return the wrong signed zero
++;; so we check HONOR_SIGNED_ZEROS.
++
++;; -a * b - c
++(define_insn "fnms<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(fma:ANYF
++	    (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
++	    (match_operand:ANYF 2 "register_operand" "f")
++	    (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))]
++  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
++  "fnmadd.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;; -(-a * b - c), modulo signed zeros
++(define_insn "*fma<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(neg:ANYF
++	    (fma:ANYF
++		(neg:ANYF (match_operand:ANYF 1 "register_operand" " f"))
++		(match_operand:ANYF 2 "register_operand" " f")
++		(neg:ANYF (match_operand:ANYF 3 "register_operand" " f")))))]
++  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
++  "fmadd.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;; -(-a * b + c), modulo signed zeros
++(define_insn "*fms<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(neg:ANYF
++	    (fma:ANYF
++		(neg:ANYF (match_operand:ANYF 1 "register_operand" " f"))
++		(match_operand:ANYF 2 "register_operand" " f")
++		(match_operand:ANYF 3 "register_operand" " f"))))]
++  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
++  "fmsub.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;; -(a * b + c)
++(define_insn "*fnms<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(neg:ANYF
++	    (fma:ANYF
++		(match_operand:ANYF 1 "register_operand" " f")
++		(match_operand:ANYF 2 "register_operand" " f")
++		(match_operand:ANYF 3 "register_operand" " f"))))]
++  ""
++  "fnmadd.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;; -(a * b - c)
++(define_insn "*fnma<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(neg:ANYF
++	    (fma:ANYF
++		(match_operand:ANYF 1 "register_operand" " f")
++		(match_operand:ANYF 2 "register_operand" " f")
++		(neg:ANYF (match_operand:ANYF 3 "register_operand" " f")))))]
++  ""
++  "fnmsub.<fmt>\t%0,%1,%2,%3"
++  [(set_attr "type" "fmadd")
++   (set_attr "mode" "<UNITMODE>")])
++
++;;
++;;  ....................
++;;
++;;	SQUARE ROOT
++;;
++;;  ....................
++
++(define_insn "*sqrt<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
++  ""
++  "fsqrt.<fmt>\t%0,%1"
++  [(set_attr "type" "fsqrt")
++   (set_attr "mode" "<UNITMODE>")
++   (set_attr "insn_count" "1")])
++
++(define_expand "sqrt<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand")
++    (sqrt:ANYF (match_operand:ANYF 1 "register_operand")))]
++  ""
++{
++  if (<MODE>mode == SFmode
++      && TARGET_RECIP_SQRT
++      && flag_unsafe_math_optimizations
++      && !optimize_insn_for_size_p ()
++      && flag_finite_math_only && !flag_trapping_math)
++    {
++      loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 0);
++      DONE;
++    }
++})
++
++(define_expand "rsqrt<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand")
++    (unspec:ANYF [(match_operand:ANYF 1 "register_operand")]
++           UNSPEC_RSQRT))]
++  "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations"
++{
++   if (<MODE>mode == SFmode
++       && TARGET_RECIP_RSQRT
++       && flag_unsafe_math_optimizations
++       && !optimize_insn_for_size_p ()
++       && flag_finite_math_only && !flag_trapping_math)
++     {
++       loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 1);
++       DONE;
++     }
++})
++
++(define_insn "*rsqrt<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++    (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
++           UNSPEC_RSQRT))]
++  "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations"
++  "frsqrt.<fmt>\t%0,%1"
++  [(set_attr "type" "frsqrt")
++   (set_attr "mode" "<UNITMODE>")])
++
++(define_insn "rsqrte<mode>"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++    (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
++              UNSPEC_RSQRTE))]
++  "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations && TARGET_RECIP_SQRT"
++  "frsqrte.<fmt>\t%0,%1"
++  [(set_attr "type" "frsqrte")
++   (set_attr "mode" "<UNITMODE>")])
++
++(define_insn "*rsqrt<mode>a"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
++		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
++  "TARGET_HARD_FLOAT && flag_unsafe_math_optimizations"
++  "frsqrt.<fmt>\t%0,%2"
++  [(set_attr "type" "frsqrt")
++   (set_attr "mode" "<UNITMODE>")
++   (set_attr "insn_count" "1")])
++
++(define_insn "*rsqrt<mode>b"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
++			     (match_operand:ANYF 2 "register_operand" "f"))))]
++  "flag_unsafe_math_optimizations"
++  "frsqrt.<fmt>\t%0,%2"
++  [(set_attr "type" "frsqrt")
++   (set_attr "mode" "<UNITMODE>")
++   (set_attr "insn_count" "1")])
++
++;;
++;;  ....................
++;;
++;;	ABSOLUTE VALUE
++;;
++;;  ....................
++
++(define_insn "abs<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
++  ""
++  "fabs.<fmt>\t%0,%1"
++  [(set_attr "type" "fabs")
+    (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+@@ -1290,7 +1168,54 @@
+   [(set_attr "type" "clz")
+    (set_attr "mode" "<MODE>")])
+ 
++;;
++;;  ....................
++;;
++;;	MIN/MAX
++;;
++;;  ....................
++
++(define_insn "smax<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(smax:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		   (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  "fmax.<fmt>\t%0,%1,%2"
++  [(set_attr "type" "fmove")
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "smin<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++       (smin:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		  (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  "fmin.<fmt>\t%0,%1,%2"
++  [(set_attr "type" "fmove")
++   (set_attr "mode" "<MODE>")])
+ 
++(define_insn "smaxa<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++       (if_then_else:ANYF
++	      (gt (abs:ANYF (match_operand:ANYF 1 "register_operand" "f"))
++		  (abs:ANYF (match_operand:ANYF 2 "register_operand" "f")))
++	      (match_dup 1)
++	      (match_dup 2)))]
++  ""
++  "fmaxa.<fmt>\t%0,%1,%2"
++  [(set_attr "type" "fmove")
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "smina<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++       (if_then_else:ANYF
++		(lt (abs:ANYF (match_operand:ANYF 1 "register_operand" "f"))
++		    (abs:ANYF (match_operand:ANYF 2 "register_operand" "f")))
++		(match_dup 1)
++		(match_dup 2)))]
++  ""
++  "fmina.<fmt>\t%0,%1,%2"
++  [(set_attr "type" "fmove")
++   (set_attr "mode" "<MODE>")])
+ 
+ ;;
+ ;;  ....................
+@@ -1299,28 +1224,21 @@
+ ;;
+ ;;  ....................
+ 
+-(define_insn "negsi2"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
+-	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
++(define_insn "neg<mode>2"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(neg:GPR (match_operand:GPR 1 "register_operand" "r")))]
+   ""
+-{
+-    return "sub.w\t%0,%.,%1";
+-}
++  "sub.<d>\t%0,%.,%1"
+   [(set_attr "alu_type"	"sub")
+-   (set_attr "mode"	"SI")])
+-
+-(define_insn "negdi2"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+-	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+-  "TARGET_64BIT"
+-  "sub.d\t%0,%.,%1"
+-  [(set_attr "alu_type"	"sub")
+-   (set_attr "mode"	"DI")])
++   (set_attr "mode" "<MODE>")])
+ 
+-;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic
+-;; instruction that treats all NaN inputs as invalid; it does not flip
+-;; their sign bit.  We therefore can't use that form if the signs of
+-;; NaNs matter.
++(define_insn "one_cmpl<mode>2"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(not:GPR (match_operand:GPR 1 "register_operand" "r")))]
++  ""
++  "nor\t%0,%.,%1"
++  [(set_attr "alu_type" "not")
++   (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "neg<mode>2"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+@@ -1329,17 +1247,6 @@
+   "fneg.<fmt>\t%0,%1"
+   [(set_attr "type" "fneg")
+    (set_attr "mode" "<UNITMODE>")])
+-
+-(define_insn "one_cmpl<mode>2"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-	(not:GPR (match_operand:GPR 1 "register_operand" "r")))]
+-  ""
+-{
+-    return "nor\t%0,%.,%1";
+-}
+-  [(set_attr "alu_type" "not")
+-   (set_attr "compression" "*")
+-   (set_attr "mode" "<MODE>")])
+ 
+ 
+ ;;
+@@ -1350,133 +1257,58 @@
+ ;;  ....................
+ ;;
+ 
+-
+-(define_expand "and<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(and:GPR (match_operand:GPR 1 "register_operand")
+-		 (match_operand:GPR 2 "and_reg_operand")))])
+-
+-;; The middle-end is not allowed to convert ANDing with 0xffff_ffff into a
+-;; zero_extendsidi2 because of TARGET_TRULY_NOOP_TRUNCATION, so handle these
+-;; here.  Note that this variant does not trigger for SI mode because we
+-;; require a 64-bit HOST_WIDE_INT and 0xffff_ffff wouldn't be a canonical
+-;; sign-extended SImode value.
+-;;
+-;; These are possible combinations for operand 1 and 2.
+-;; (r=register, mem=memory, x=match, S=split):
+-;;
+-;;     \ op1    r/EXT   r/!EXT  mem
+-;;  op2
+-;;
+-;;  andi           x     x
+-;;  0xff           x     x       x
+-;;  0xffff         x     x       x
+-;;  0xffff_ffff    x     S       x
+-;;  low-bitmask    x
+-;;  register       x     x
+-;;  register =op1
+-
+-(define_insn "*and<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
+-	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "o,o,W,r,r,r,r")
+-		 (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,K,Yx,Yw,r")))]
+-  " and_operands_ok (<MODE>mode, operands[1], operands[2])"
+-{
+-  int len;
+-
+-  switch (which_alternative)
+-    {
+-    case 0:
+-      operands[1] = gen_lowpart (QImode, operands[1]);
+-      return "ld.bu\t%0,%1";
+-    case 1:
+-      operands[1] = gen_lowpart (HImode, operands[1]);
+-      return "ld.hu\t%0,%1";
+-    case 2:
+-      operands[1] = gen_lowpart (SImode, operands[1]);
+-      if (loongarch_14bit_shifted_offset_address_p (XEXP (operands[1], 0), SImode))
+-        return "ldptr.w\t%0,%1\n\tbstrins.d\t%0,$r0,63,32";
+-      else if (loongarch_12bit_offset_address_p (XEXP (operands[1], 0), SImode))
+-        return "ld.wu\t%0,%1";
+-      else
+-        gcc_unreachable ();
+-    case 3:
+-      return "andi\t%0,%1,%x2";
+-    case 4:
+-      len = low_bitmask_len (<MODE>mode, INTVAL (operands[2]));
+-      operands[2] = GEN_INT (len-1);
+-      return "bstrpick.<d>\t%0,%1,%2,0";
+-    case 5:
+-      return "#";
+-    case 6:
+-      return "and\t%0,%1,%2";
+-    default:
+-      gcc_unreachable ();
+-    }
+-}
+-  [(set_attr "move_type" "load,load,load,andi,pick_ins,shift_shift,logical")
+-   (set_attr "compression" "*,*,*,*,*,*,*")
++(define_insn "<optab><mode>3"
++  [(set (match_operand:GPR 0 "register_operand" "=r,r")
++	(any_bitwise:GPR (match_operand:GPR 1 "register_operand" "r,r")
++			 (match_operand:GPR 2 "uns_arith_operand" "r,K")))]
++  ""
++  "<insn>%i2\t%0,%1,%2"
++  [(set_attr "type" "logical")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_expand "ior<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(ior:GPR (match_operand:GPR 1 "register_operand")
+-		 (match_operand:GPR 2 "uns_arith_operand")))]
++(define_insn "and<mode>3_extended"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r")
++		 (match_operand:GPR 2 "low_bitmask_operand" "Yx")))]
+   ""
+ {
+-})
++  int len;
+ 
+-(define_insn "*ior<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+-	(ior:GPR (match_operand:GPR 1 "register_operand" "r,r")
+-		 (match_operand:GPR 2 "uns_arith_operand" "r,K")))]
+-  ""
+-  "@
+-   or\t%0,%1,%2
+-   ori\t%0,%1,%x2"
+-  [(set_attr "alu_type" "or")
+-   (set_attr "compression" "*,*")
++  len = low_bitmask_len (<MODE>mode, INTVAL (operands[2]));
++  operands[2] = GEN_INT (len-1);
++  return "bstrpick.<d>\t%0,%1,%2,0";
++}
++  [(set_attr "move_type" "pick_ins")
+    (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "*iorhi3"
+   [(set (match_operand:HI 0 "register_operand" "=r,r")
+-	(ior:HI (match_operand:HI 1 "register_operand" "r,r")
+-		(match_operand:HI 2 "uns_arith_operand" "K,r")))]
++	(ior:HI (match_operand:HI 1 "register_operand" "%r,r")
++		(match_operand:HI 2 "uns_arith_operand" "r,K")))]
+   ""
+-  "@
+-   ori\t%0,%1,%x2
+-   or\t%0,%1,%2"
+-  [(set_attr "alu_type" "or")
++  "or%i2\t%0,%1,%2"
++  [(set_attr "type" "logical")
+    (set_attr "mode" "HI")])
+ 
+-(define_expand "xor<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(xor:GPR (match_operand:GPR 1 "register_operand")
+-		 (match_operand:GPR 2 "uns_arith_operand")))]
+-  ""
+-  "")
+-
+-(define_insn "*xor<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+-	(xor:GPR (match_operand:GPR 1 "register_operand" "r,r")
+-		 (match_operand:GPR 2 "uns_arith_operand" "r,K")))]
+-  ""
+-  "@
+-   xor\t%0,%1,%2
+-   xori\t%0,%1,%x2"
+-  [(set_attr "alu_type" "xor")
+-   (set_attr "compression" "*,*")
+-   (set_attr "mode" "<MODE>")])
+-
+-
+ (define_insn "*nor<mode>3"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+-	(and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "r"))
++	(and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r"))
+ 		 (not:GPR (match_operand:GPR 2 "register_operand" "r"))))]
+   ""
+   "nor\t%0,%1,%2"
+-  [(set_attr "alu_type" "nor")
++  [(set_attr "type" "logical")
+    (set_attr "mode" "<MODE>")])
++
++(define_insn "<optab>n<mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(neg_bitwise:GPR
++	    (not:GPR (match_operand:GPR 1 "register_operand" "r"))
++	    (match_operand:GPR 2 "register_operand" "r")))]
++  ""
++  "<insn>n\t%0,%2,%1"
++  [(set_attr "type" "logical")
++   (set_attr "mode" "<MODE>")])
++
+ 
+ ;;
+ ;;  ....................
+@@ -1485,163 +1317,109 @@
+ ;;
+ ;;  ....................
+ 
+-
+-
+-(define_insn "truncdfsf2"
+-  [(set (match_operand:SF 0 "register_operand" "=f")
+-	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+-  "fcvt.s.d\t%0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "cnv_mode"	"D2S")   
+-   (set_attr "mode"	"SF")])
+-
+-;; Integer truncation patterns.  Truncating SImode values to smaller
+-;; modes is a no-op, as it is for most other GCC ports.  Truncating
+-;; DImode values to SImode is not a no-op for TARGET_64BIT since we
+-;; need to make sure that the lower 32 bits are properly sign-extended
+-;; (see TARGET_TRULY_NOOP_TRUNCATION).  Truncating DImode values into modes
+-;; smaller than SImode is equivalent to two separate truncations:
+-;;
+-;;                        A       B
+-;;    DI ---> HI  ==  DI ---> SI ---> HI
+-;;    DI ---> QI  ==  DI ---> SI ---> QI
+-;;
+-;; Step A needs a real instruction but step B does not.
+-
+-(define_insn "truncdisi2"
+-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,ZC,m")
+-        (truncate:SI (match_operand:DI 1 "register_operand" "r,r,r")))]
+-  "TARGET_64BIT"
+-  "@
+-    slli.w\t%0,%1,0
+-    stptr.w\t%1,%0
+-    st.w\t%1,%0"
+-  [(set_attr "move_type" "sll0,store,store")
+-   (set_attr "mode" "SI")])
+-
+ (define_insn "truncdi<mode>2"
+-  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,m")
+-        (truncate:SHORT (match_operand:DI 1 "register_operand" "r,r")))]
++  [(set (match_operand:SUBDI 0 "nonimmediate_operand" "=r,m,k")
++        (truncate:SUBDI (match_operand:DI 1 "register_operand" "r,r,r")))]
+   "TARGET_64BIT"
+   "@
+     slli.w\t%0,%1,0
+-    st.<size>\t%1,%0"
+-  [(set_attr "move_type" "sll0,store")
++    st.<size>\t%1,%0
++    stx.<size>\t%1,%0"
++  [(set_attr "move_type" "sll0,store,store")
+    (set_attr "mode" "SI")])
+ 
+-;; Combiner patterns to optimize shift/truncate combinations.
+-
+-(define_insn "*ashr_trunc<mode>"
+-  [(set (match_operand:SUBDI 0 "register_operand" "=r")
+-        (truncate:SUBDI
+-	  (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+-		       (match_operand:DI 2 "const_arith_operand" ""))))]
+-  "TARGET_64BIT && IN_RANGE (INTVAL (operands[2]), 32, 63)"
+-  "srai.d\t%0,%1,%2"
+-  [(set_attr "type" "shift")
+-   (set_attr "mode" "<MODE>")])
++(define_insn "truncdfsf2"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
++  "TARGET_DOUBLE_FLOAT"
++  "fcvt.s.d\t%0,%1"
++  [(set_attr "type" "fcvt")
++   (set_attr "cnv_mode"	"D2S")
++   (set_attr "mode" "SF")])
+ 
+-(define_insn "*lshr32_trunc<mode>"
+-  [(set (match_operand:SUBDI 0 "register_operand" "=r")
+-        (truncate:SUBDI
+-	  (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+-		       (const_int 32))))]
+-  "TARGET_64BIT"
+-  "srai.d\t%0,%1,32"
+-  [(set_attr "type" "shift")
+-   (set_attr "mode" "<MODE>")])
++;;(define_insn "truncdisi2_extended"
++;;  [(set (match_operand:SI 0 "nonimmediate_operand" "=ZC")
++;;	(truncate:SI (match_operand:DI 1 "register_operand" "r")))]
++;;  "TARGET_64BIT"
++;;  "stptr.w\t%1,%0"
++;;  [(set_attr "move_type" "store")
++;;   (set_attr "mode" "SI")])
+ 
+ 
+-
+ ;;
+ ;;  ....................
+ ;;
+ ;;	ZERO EXTENSION
+ ;;
+ ;;  ....................
+-
+-;; Extension insns.
+-
+ (define_expand "zero_extendsidi2"
+   [(set (match_operand:DI 0 "register_operand")
+-        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
++	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
+   "TARGET_64BIT")
+ 
+-(define_insn "*zero_extendsidi2_dext"
+-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+-        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,ZC,W")))]
++(define_insn_and_split "*zero_extendsidi2_internal"
++  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
++	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,ZC,k")))]
+   "TARGET_64BIT"
+   "@
+    bstrpick.d\t%0,%1,31,0
+-   ldptr.w\t%0,%1\n\tlu32i.d\t%0,0
+-   ld.wu\t%0,%1"
+-  [(set_attr "move_type" "arith,load,load")
+-   (set_attr "mode" "DI")
+-   (set_attr "insn_count" "1,2,1")])
+-
+-;; See the comment before the *and<mode>3 pattern why this is generated by
+-;; combine.
+-
+-(define_expand "zero_extend<SHORT:mode><GPR:mode>2"
+-  [(set (match_operand:GPR 0 "register_operand")
+-        (zero_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))]
+-  ""
+-{
+-})
+-
+-(define_insn "*zero_extend<SHORT:mode><GPR:mode>2"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+-        (zero_extend:GPR
+-	     (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
+-  ""
+-{
+-  switch (which_alternative)
+-    {
+-    case 0:
+-      return "bstrpick.<d>\t%0,%1,<SHORT:qi_hi>,0";
+-    case 1:
+-      return "ld.<SHORT:size>u\t%0,%1";
+-    default:
+-      gcc_unreachable ();
++   ld.wu\t%0,%1
++   #
++   ldx.wu\t%0,%1"
++  "&& reload_completed
++   && MEM_P (operands[1])
++   && (loongarch_14bit_shifted_offset_address_p (XEXP (operands[1], 0), SImode)
++       && !loongarch_12bit_offset_address_p (XEXP (operands[1], 0), SImode))
++   && !paradoxical_subreg_p (operands[0])"
++  [(set (match_dup 3) (match_dup 1))
++   (set (match_dup 0)
++	(ior:DI (zero_extend:DI (subreg:SI (match_dup 0) 0))
++		(match_dup 2)))]
++  {
++    operands[1] = gen_lowpart (SImode, operands[1]);
++    operands[3] = gen_lowpart (SImode, operands[0]);
++    operands[2] = const0_rtx;
+   }
+-}
+-  [(set_attr "move_type" "pick_ins,load")
+-   (set_attr "compression" "*,*")
+-   (set_attr "mode" "<GPR:MODE>")])
+-
++  [(set_attr "move_type" "arith,load,load,load")
++   (set_attr "mode" "DI")])
+ 
+-(define_expand "zero_extendqihi2"
+-  [(set (match_operand:HI 0 "register_operand")
+-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
++(define_insn "zero_extend<SHORT:mode><GPR:mode>2"
++  [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
++	(zero_extend:GPR
++	     (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
+   ""
+-{
+-})
++  "@
++   bstrpick.w\t%0,%1,<SHORT:7_or_15>,0
++   ld.<SHORT:size>u\t%0,%1
++   ldx.<SHORT:size>u\t%0,%1"
++  [(set_attr "move_type" "pick_ins,load,load")
++   (set_attr "mode" "<GPR:MODE>")])
+ 
+-(define_insn "*zero_extendqihi2"
+-  [(set (match_operand:HI 0 "register_operand" "=r,r")
+-        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
++(define_insn "zero_extendqihi2"
++  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
++	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
+   ""
+   "@
+-   andi\t%0,%1,0x00ff
++   andi\t%0,%1,0xff
++   ldx.bu\t%0,%1
+    ld.bu\t%0,%1"
+-  [(set_attr "move_type" "andi,load")
++  [(set_attr "move_type" "andi,load,load")
+    (set_attr "mode" "HI")])
+ 
+ ;; Combiner patterns to optimize truncate/zero_extend combinations.
+ 
+ (define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+-        (zero_extend:GPR
++	(zero_extend:GPR
+ 	    (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
+   "TARGET_64BIT"
+-  "bstrpick.<d>\t%0,%1,<SHORT:qi_hi>,0"
++  "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0"
+   [(set_attr "move_type" "pick_ins")
+    (set_attr "mode" "<GPR:MODE>")])
+ 
+ (define_insn "*zero_extendhi_truncqi"
+   [(set (match_operand:HI 0 "register_operand" "=r")
+-        (zero_extend:HI
++	(zero_extend:HI
+ 	    (truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
+   "TARGET_64BIT"
+   "andi\t%0,%1,0xff"
+@@ -1655,142 +1433,77 @@
+ ;;
+ ;;  ....................
+ 
+-;; Extension insns.
+-;; Those for integer source operand are ordered widest source type first.
+-
+-;; When TARGET_64BIT, all SImode integer and accumulator registers
+-;; should already be in sign-extended form (see TARGET_TRULY_NOOP_TRUNCATION
+-;; and truncdisi2).  We can therefore get rid of register->register
+-;; instructions if we constrain the source to be in the same register as
+-;; the destination.
+-;;
+-;; Only the pre-reload scheduler sees the type of the register alternatives;
+-;; we split them into nothing before the post-reload scheduler runs.
+-;; These alternatives therefore have type "move" in order to reflect
+-;; what happens if the two pre-reload operands cannot be tied, and are
+-;; instead allocated two separate GPRs.  We don't distinguish between
+-;; the GPR and LO cases because we don't usually know during pre-reload
+-;; scheduling whether an operand will be LO or not.
+ (define_insn_and_split "extendsidi2"
+-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+-        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m")))]
++  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
++	(sign_extend:DI
++	    (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m,k")))]
+   "TARGET_64BIT"
+-  "@
+-   #
+-   ldptr.w\t%0,%1
+-   ld.w\t%0,%1"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "#";
++    case 1:
++      {
++      rtx offset = XEXP (operands[1], 0);
++      if (GET_CODE (offset) == PLUS)
++	offset = XEXP (offset, 1);
++      else
++	offset = const0_rtx;
++      if (const_arith_operand (offset, Pmode) || (offset == const0_rtx))
++	return "ld.w\t%0,%1";
++      else
++	return "ldptr.w\t%0,%1";
++      }
++    case 2:
++	return "ld.w\t%0,%1";
++    case 3:
++	return "ldx.w\t%0,%1";
++    default:
++      gcc_unreachable ();
++    }
++}
+   "&& reload_completed && register_operand (operands[1], VOIDmode)"
+   [(const_int 0)]
+ {
+   emit_note (NOTE_INSN_DELETED);
+   DONE;
+ }
+-  [(set_attr "move_type" "move,load,load")
++  [(set_attr "move_type" "move,load,load,load")
+    (set_attr "mode" "DI")])
+ 
+-(define_expand "extend<SHORT:mode><GPR:mode>2"
+-  [(set (match_operand:GPR 0 "register_operand")
+-        (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))]
+-  "")
+-
+-
+-(define_insn "*extend<SHORT:mode><GPR:mode>2_se<SHORT:size>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+-        (sign_extend:GPR
+-	     (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))]
++(define_insn "extend<SHORT:mode><GPR:mode>2"
++  [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
++	(sign_extend:GPR
++	     (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
+   ""
+   "@
+    ext.w.<SHORT:size>\t%0,%1
+-   ld.<SHORT:size>\t%0,%1"
+-  [(set_attr "move_type" "signext,load")
++   ld.<SHORT:size>\t%0,%1
++   ldx.<SHORT:size>\t%0,%1"
++  [(set_attr "move_type" "signext,load,load")
+    (set_attr "mode" "<GPR:MODE>")])
+ 
+-(define_expand "extendqihi2"
+-  [(set (match_operand:HI 0 "register_operand")
+-        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
+-  "")
+-
+-(define_insn "*extendqihi2_seb"
+-  [(set (match_operand:HI 0 "register_operand" "=r,r")
+-        (sign_extend:HI
+-	     (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
++(define_insn "extendqihi2"
++  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
++	(sign_extend:HI
++	     (match_operand:QI 1 "nonimmediate_operand" "r,m,k")))]
+   ""
+   "@
+    ext.w.b\t%0,%1
+-   ld.b\t%0,%1"
+-  [(set_attr "move_type" "signext,load")
+-   (set_attr "mode" "SI")])
+-
+-;; Combiner patterns for truncate/sign_extend combinations.  The SI versions
+-;; use the shift/truncate patterns.
+-
+-(define_insn_and_split "*extenddi_truncate<mode>"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+-	(sign_extend:DI
+-	    (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
+-  "TARGET_64BIT"
+-  "#"
+-  "&& reload_completed"
+-  [(set (match_dup 2)
+-	(ashift:DI (match_dup 1)
+-		   (match_dup 3)))
+-   (set (match_dup 0)
+-	(ashiftrt:DI (match_dup 2)
+-		     (match_dup 3)))]
+-{
+-  operands[2] = gen_lowpart (DImode, operands[0]);
+-  operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (<MODE>mode));
+-}
+-  [(set_attr "move_type" "shift_shift")
+-   (set_attr "mode" "DI")])
+-
+-(define_insn_and_split "*extendsi_truncate<mode>"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
+-	(sign_extend:SI
+-	    (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
+-  "TARGET_64BIT"
+-  "#"
+-  "&& reload_completed"
+-  [(set (match_dup 2)
+-	(ashift:DI (match_dup 1)
+-		   (match_dup 3)))
+-   (set (match_dup 0)
+-	(truncate:SI (ashiftrt:DI (match_dup 2)
+-				  (match_dup 3))))]
+-{
+-  operands[2] = gen_lowpart (DImode, operands[0]);
+-  operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (<MODE>mode));
+-}
+-  [(set_attr "move_type" "shift_shift")
+-   (set_attr "mode" "SI")])
+-
+-(define_insn_and_split "*extendhi_truncateqi"
+-  [(set (match_operand:HI 0 "register_operand" "=r")
+-	(sign_extend:HI
+-	    (truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
+-  "TARGET_64BIT"
+-  "#"
+-  "&& reload_completed"
+-  [(set (match_dup 2)
+-	(ashift:DI (match_dup 1)
+-		   (const_int 56)))
+-   (set (match_dup 0)
+-	(truncate:HI (ashiftrt:DI (match_dup 2)
+-				  (const_int 56))))]
+-{
+-  operands[2] = gen_lowpart (DImode, operands[0]);
+-}
+-  [(set_attr "move_type" "shift_shift")
++   ld.b\t%0,%1
++   ldx.b\t%0,%1"
++  [(set_attr "move_type" "signext,load,load")
+    (set_attr "mode" "SI")])
+ 
+ (define_insn "extendsfdf2"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+   "fcvt.d.s\t%0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "cnv_mode"	"S2D")   
+-   (set_attr "mode"	"DF")])
++  [(set_attr "type" "fcvt")
++   (set_attr "cnv_mode"	"S2D")
++   (set_attr "mode" "DF")])
+ 
+ ;;
+ ;;  ....................
+@@ -1799,104 +1512,60 @@
+ ;;
+ ;;  ....................
+ 
+-(define_expand "fix_truncdfsi2"
+-  [(set (match_operand:SI 0 "register_operand")
+-	(fix:SI (match_operand:DF 1 "register_operand")))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+-""
+-)
+-
+-(define_insn "fix_truncdfsi2_insn"
+-  [(set (match_operand:SI 0 "register_operand" "=f")
+-	(fix:SI (match_operand:DF 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+-  "ftintrz.w.d %0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"DF")
+-   (set_attr "cnv_mode"	"D2I")])
+-
+-
+-(define_expand "fix_truncsfsi2"
+-  [(set (match_operand:SI 0 "register_operand")
+-	(fix:SI (match_operand:SF 1 "register_operand")))]
+-  "TARGET_HARD_FLOAT"
+-""
+-)
+-
+-(define_insn "fix_truncsfsi2_insn"
+-  [(set (match_operand:SI 0 "register_operand" "=f")
+-	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT"
+-  "ftintrz.w.s %0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"SF")
+-   (set_attr "cnv_mode"	"S2I")])
+-
+-
+-(define_insn "fix_truncdfdi2"
+-  [(set (match_operand:DI 0 "register_operand" "=f")
+-	(fix:DI (match_operand:DF 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+-  "ftintrz.l.d %0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"DF")
+-   (set_attr "cnv_mode"	"D2I")])
++;; conversion of a floating-point value to a integer
+ 
++(define_insn "fix_trunc<ANYF:mode><GPR:mode>2"
++  [(set (match_operand:GPR 0 "register_operand" "=f")
++	(fix:GPR (match_operand:ANYF 1 "register_operand" "f")))]
++  ""
++  "ftintrz.<GPR:ifmt>.<ANYF:fmt>\t%0,%1"
++  [(set_attr "type" "fcvt")
++   (set_attr "mode" "<ANYF:MODE>")])
+ 
+-(define_insn "fix_truncsfdi2"
+-  [(set (match_operand:DI 0 "register_operand" "=f")
+-	(fix:DI (match_operand:SF 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+-  "ftintrz.l.s %0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"SF")
+-   (set_attr "cnv_mode"	"S2I")])
+-
++;; conversion of an integral (or boolean) value to a floating-point value
+ 
+ (define_insn "floatsidf2"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+   "ffint.d.w\t%0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"DF")
++  [(set_attr "type" "fcvt")
++   (set_attr "mode" "DF")
+    (set_attr "cnv_mode"	"I2D")])
+ 
+-
+ (define_insn "floatdidf2"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(float:DF (match_operand:DI 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+   "ffint.d.l\t%0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"DF")
+-   (set_attr "cnv_mode"	"I2D")])
+-
++  [(set_attr "type" "fcvt")
++   (set_attr "mode" "DF")
++   (set_attr "cnv_mode" "I2D")])
+ 
+ (define_insn "floatsisf2"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+ 	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+   "TARGET_HARD_FLOAT"
+   "ffint.s.w\t%0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"SF")
++  [(set_attr "type" "fcvt")
++   (set_attr "mode" "SF")
+    (set_attr "cnv_mode"	"I2S")])
+ 
+-
+ (define_insn "floatdisf2"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+ 	(float:SF (match_operand:DI 1 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+   "ffint.s.l\t%0,%1"
+-  [(set_attr "type"	"fcvt")
+-   (set_attr "mode"	"SF")
++  [(set_attr "type" "fcvt")
++   (set_attr "mode" "SF")
+    (set_attr "cnv_mode"	"I2S")])
+ 
++;; Convert a floating-point value to an unsigned integer.
+ 
+ (define_expand "fixuns_truncdfsi2"
+   [(set (match_operand:SI 0 "register_operand")
+ 	(unsigned_fix:SI (match_operand:DF 1 "register_operand")))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+ {
+   rtx reg1 = gen_reg_rtx (DFmode);
+   rtx reg2 = gen_reg_rtx (DFmode);
+@@ -1908,41 +1577,38 @@
+ 
+   real_2expN (&offset, 31, DFmode);
+ 
+-  if (reg1)			/* Turn off complaints about unreached code.  */
+-    {
+-      loongarch_emit_move (reg1, const_double_from_real_value (offset, DFmode));
+-      do_pending_stack_adjust ();
++  loongarch_emit_move (reg1,
++		       const_double_from_real_value (offset, DFmode));
++  do_pending_stack_adjust ();
+ 
+-      test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+-      emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
++  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
++  emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
+ 
+-      emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1]));
+-      emit_jump_insn (gen_rtx_SET (pc_rtx,
+-                                   gen_rtx_LABEL_REF (VOIDmode, label2)));
+-      emit_barrier ();
++  emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1]));
++  emit_jump_insn (gen_rtx_SET (pc_rtx,
++			       gen_rtx_LABEL_REF (VOIDmode, label2)));
++  emit_barrier ();
+ 
+-      emit_label (label1);
+-      loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
+-      loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode
+-				     (BITMASK_HIGH, SImode)));
++  emit_label (label1);
++  loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
++  loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode
++				 (BITMASK_HIGH, SImode)));
+ 
+-      emit_insn (gen_fix_truncdfsi2 (operands[0], reg2));
+-      emit_insn (gen_iorsi3 (operands[0], operands[0], reg3));
++  emit_insn (gen_fix_truncdfsi2 (operands[0], reg2));
++  emit_insn (gen_iorsi3 (operands[0], operands[0], reg3));
+ 
+-      emit_label (label2);
++  emit_label (label2);
+ 
+-      /* Allow REG_NOTES to be set on last insn (labels don't have enough
+-	 fields, and can't be used for REG_NOTES anyway).  */
+-      emit_use (stack_pointer_rtx);
+-      DONE;
+-    }
++  /* Allow REG_NOTES to be set on last insn (labels don't have enough
++     fields, and can't be used for REG_NOTES anyway).  */
++  emit_use (stack_pointer_rtx);
++  DONE;
+ })
+ 
+-
+ (define_expand "fixuns_truncdfdi2"
+   [(set (match_operand:DI 0 "register_operand")
+ 	(unsigned_fix:DI (match_operand:DF 1 "register_operand")))]
+-  "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+ {
+   rtx reg1 = gen_reg_rtx (DFmode);
+   rtx reg2 = gen_reg_rtx (DFmode);
+@@ -1980,7 +1646,6 @@
+   DONE;
+ })
+ 
+-
+ (define_expand "fixuns_truncsfsi2"
+   [(set (match_operand:SI 0 "register_operand")
+ 	(unsigned_fix:SI (match_operand:SF 1 "register_operand")))]
+@@ -2022,11 +1687,10 @@
+   DONE;
+ })
+ 
+-
+ (define_expand "fixuns_truncsfdi2"
+   [(set (match_operand:DI 0 "register_operand")
+ 	(unsigned_fix:DI (match_operand:SF 1 "register_operand")))]
+-  "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT"
++  "TARGET_DOUBLE_FLOAT"
+ {
+   rtx reg1 = gen_reg_rtx (SFmode);
+   rtx reg2 = gen_reg_rtx (SFmode);
+@@ -2067,35 +1731,35 @@
+ ;;
+ ;;  ....................
+ ;;
+-;;	DATA MOVEMENT
++;;	EXTRACT AND INSERT
+ ;;
+ ;;  ....................
+ 
+ (define_expand "extzv<mode>"
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(zero_extract:GPR (match_operand:GPR 1 "register_operand")
+-			  (match_operand 2 "const_int_operand")
+-			  (match_operand 3 "const_int_operand")))]
++  [(set (match_operand:X 0 "register_operand")
++	(zero_extract:X (match_operand:X 1 "register_operand")
++			(match_operand 2 "const_int_operand")
++			(match_operand 3 "const_int_operand")))]
+   ""
+ {
+   if (!loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]),
+-			   INTVAL (operands[3])))
++				INTVAL (operands[3])))
+     FAIL;
+ })
+ 
+ (define_insn "*extzv<mode>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-	(zero_extract:GPR (match_operand:GPR 1 "register_operand" "r")
+-			  (match_operand 2 "const_int_operand" "")
+-			  (match_operand 3 "const_int_operand" "")))]
++  [(set (match_operand:X 0 "register_operand" "=r")
++	(zero_extract:X (match_operand:X 1 "register_operand" "r")
++			(match_operand 2 "const_int_operand" "")
++			(match_operand 3 "const_int_operand" "")))]
+   "loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]),
+-		       INTVAL (operands[3]))"
++			    INTVAL (operands[3]))"
+ {
+-  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) -1 );
++  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) - 1);
+   return "bstrpick.<d>\t%0,%1,%2,%3";
+ }
+-  [(set_attr "type"	"arith")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "arith")
++   (set_attr "mode" "<MODE>")])
+ 
+ (define_expand "insv<mode>"
+   [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand")
+@@ -2105,7 +1769,7 @@
+   ""
+ {
+   if (!loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]),
+-			   INTVAL (operands[2])))
++				INTVAL (operands[2])))
+     FAIL;
+ })
+ 
+@@ -2115,26 +1779,20 @@
+ 			  (match_operand:SI 2 "const_int_operand" ""))
+ 	(match_operand:GPR 3 "reg_or_0_operand" "rJ"))]
+   "loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]),
+-		       INTVAL (operands[2]))"
++			    INTVAL (operands[2]))"
+ {
+-  operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) -1 );
++  operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) - 1);
+   return "bstrins.<d>\t%0,%z3,%1,%2";
+ }
+-  [(set_attr "type"	"arith")
+-   (set_attr "mode"	"<MODE>")])
+-
+-;; Allow combine to split complex const_int load sequences, using operand 2
+-;; to store the intermediate results.  See move_operand for details.
+-(define_split
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(match_operand:GPR 1 "splittable_const_int_operand"))
+-   (clobber (match_operand:GPR 2 "register_operand"))]
+-  ""
+-  [(const_int 0)]
+-{
+-  loongarch_move_integer (operands[2], operands[0], INTVAL (operands[1]));
+-  DONE;
+-})
++  [(set_attr "type" "arith")
++   (set_attr "mode" "<MODE>")])
++
++;;
++;;  ....................
++;;
++;;	DATA MOVEMENT
++;;
++;;  ....................
+ 
+ ;; 64-bit integer moves
+ 
+@@ -2151,152 +1809,46 @@
+     DONE;
+ })
+ 
+-
+ (define_insn "*movdi_32bit"
+-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,ZC,r,m,*f,*f,*r,*m")
+-       (match_operand:DI 1 "move_operand" "r,i,ZC,r,m,r,*J*r,*m,*f,*f"))]
++  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
++       (match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))]
+   "!TARGET_64BIT
+    && (register_operand (operands[0], DImode)
+        || reg_or_0_operand (operands[1], DImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore")
+-   (set (attr "mode")
+-       (if_then_else (eq_attr "move_type" "imul")
+-                     (const_string "SI")
+-                     (const_string "DI")))])
+-
++  [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
++   (set_attr "mode" "DI")])
+ 
+ (define_insn "*movdi_64bit"
+-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,ZC,r,m,*f,*f,*r,*m")
+-	(match_operand:DI 1 "move_operand" "r,Yd,ZC,rJ,m,rJ,*r*J,*m,*f,*f"))]
++  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
++	(match_operand:DI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f"))]
+   "TARGET_64BIT
+    && (register_operand (operands[0], DImode)
+-       || reg_or_0_operand (operands[1], DImode))
+-   && !((GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF)
+-        && symbolic_operand (operands[1], VOIDmode)
+-        && (loongarch_cmodel_var == LARCH_CMODEL_EXTREME))"
++       || reg_or_0_operand (operands[1], DImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore")
++  [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
+    (set_attr "mode" "DI")])
+ 
+-(define_insn "movdi_extreme"
+-  [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
+-                   (unspec_volatile:DI [(match_operand:DI 1 "symbolic_operand" "")]
+-                    UNSPECV_MOVE_EXTREME))
+-   (use (match_operand:DI 2 "register_operand" "=&r"))])]
+-  "TARGET_64BIT && (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)"
+-  {
+-    if (!loongarch_global_symbol_p (operands[1])
+-    || loongarch_symbol_binds_local_p (operands[1]))
+-      return "la.local\t%0,%2,%1";
+-    else
+-      return "la.global\t%0,%2,%1";
+-  }
+-  [(set_attr "move_type" "const")
+-   (set_attr "mode" "DI")])
+ ;; 32-bit Integer moves
+ 
+-;; Unlike most other insns, the move insns can't be split with
+-;; different predicates, because register spilling and other parts of
+-;; the compiler, have memoized the insn number already.
+-
+-(define_expand "mov<mode>"
+-  [(set (match_operand:IMOVE32 0 "")
+-	(match_operand:IMOVE32 1 ""))]
+-  ""
+-{
+-  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
+-    DONE;
+-})
+-
+-;; The difference between these two is whether or not ints are allowed
+-;; in FP registers (off by default, use -mdebugh to enable).
+-
+-(define_insn "*mov<mode>_internal"
+-  [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=r,r,r,ZC,r,m,*f,*f,*r,*m,*r,*z")
+-	(match_operand:IMOVE32 1 "move_operand" "r,Yd,ZC,rJ,m,rJ,*r*J,*m,*f,*f,*z,*r"))]
+-  "(register_operand (operands[0], <MODE>mode)
+-       || reg_or_0_operand (operands[1], <MODE>mode))"
+-  { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
+-   (set_attr "compression" "all,*,*,*,*,*,*,*,*,*,*,*")
+-   (set_attr "mode" "SI")])
+-
+-
+-
+-;; LARCH supports loading and storing a floating point register from
+-;; the sum of two general registers.  We use two versions for each of
+-;; these four instructions: one where the two general registers are
+-;; SImode, and one where they are DImode.  This is because general
+-;; registers will be in SImode when they hold 32-bit values, but,
+-;; since the 32-bit values are always sign extended, the [ls][wd]xc1
+-;; instructions will still work correctly.
+-
+-;; ??? Perhaps it would be better to support these instructions by
+-;; modifying TARGET_LEGITIMATE_ADDRESS_P and friends.  However, since
+-;; these instructions can only be used to load and store floating
+-;; point registers, that would probably cause trouble in reload.
+-
+-(define_insn "*<ANYF:floadx>_<P:mode>"
+-  [(set (match_operand:ANYF 0 "register_operand" "=f")
+-	(mem:ANYF (plus:P (match_operand:P 1 "register_operand" "r")
+-			  (match_operand:P 2 "register_operand" "r"))))]
+-  ""
+-  "<ANYF:floadx>\t%0,%1,%2"
+-  [(set_attr "type" "fpidxload")
+-   (set_attr "mode" "<ANYF:UNITMODE>")])
+-
+-(define_insn "*<ANYF:fstorex>_<P:mode>"
+-  [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "r")
+-			  (match_operand:P 2 "register_operand" "r")))
+-	(match_operand:ANYF 0 "register_operand" "f"))]
+-  "TARGET_HARD_FLOAT"
+-  "<ANYF:fstorex>\t%0,%1,%2"
+-  [(set_attr "type" "fpidxstore")
+-   (set_attr "mode" "<ANYF:UNITMODE>")])
+-
+-;; Loongson index address load and store.
+-(define_insn "*<GPR:loadx>_<P:mode>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-        (mem:GPR
+-                (plus:P (match_operand:P 1 "register_operand" "r")
+-                        (match_operand:P 2 "register_operand" "r"))))]
+-  ""
+-  "<GPR:loadx>\t%0,%1,%2"
+-  [(set_attr "type" "load")
+-   (set_attr "mode" "<GPR:MODE>")])
+-
+-(define_insn "*<GPR:storex>_<P:mode>"
+-  [(set (mem:GPR (plus:P (match_operand:P 1 "register_operand" "r")
+-                         (match_operand:P 2 "register_operand" "r")))
+-        (match_operand:GPR 0 "register_operand" "r"))]
+-  ""
+-  "<GPR:storex>\t%0,%1,%2"
+-  [(set_attr "type" "store")
+-   (set_attr "mode" "<GPR:MODE>")])
+-
+-;; SHORT mode sign_extend.
+-(define_insn "*extend_<SHORT:loadx>_<GPR:mode>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-        (sign_extend:GPR
+-          (mem:SHORT
+-            (plus:P (match_operand:P 1 "register_operand" "r")
+-                    (match_operand:P 2 "register_operand" "r")))))]
++(define_expand "movsi"
++  [(set (match_operand:SI 0 "")
++	(match_operand:SI 1 ""))]
+   ""
+-  "<SHORT:loadx>\t%0,%1,%2"
+-  [(set_attr "type" "load")
+-   (set_attr "mode" "<GPR:MODE>")])
++{
++  if (loongarch_legitimize_move (SImode, operands[0], operands[1]))
++    DONE;
++})
+ 
+-(define_insn "*extend_<SHORT:storex>"
+-  [(set (mem:SHORT (plus:P (match_operand:P 1 "register_operand" "r")
+-                           (match_operand:P 2 "register_operand" "r")))
+-        (match_operand:SHORT 0 "register_operand" "r"))]
+-  ""
+-  "<SHORT:storex>\t%0,%1,%2"
+-  [(set_attr "type" "store")
++(define_insn "*movsi_internal"
++  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z")
++	(match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))]
++  "(register_operand (operands[0], SImode)
++       || reg_or_0_operand (operands[1], SImode))"
++  { return loongarch_output_move (operands[0], operands[1]); }
++  [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
+    (set_attr "mode" "SI")])
+ 
+-
+ ;; 16-bit Integer moves
+ 
+ ;; Unlike most other insns, the move insns can't be split with
+@@ -2314,13 +1866,12 @@
+ })
+ 
+ (define_insn "*movhi_internal"
+-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m")
+-	(match_operand:HI 1 "move_operand"         "r,Yd,I,m,rJ"))]
++  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,k")
++	(match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))]
+   "(register_operand (operands[0], HImode)
+        || reg_or_0_operand (operands[1], HImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "move,const,const,load,store")
+-   (set_attr "compression" "all,all,*,*,*")
++  [(set_attr "move_type" "move,const,const,load,store,load,store")
+    (set_attr "mode" "HI")])
+ 
+ ;; 8-bit Integer moves
+@@ -2340,13 +1891,12 @@
+ })
+ 
+ (define_insn "*movqi_internal"
+-  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
+-	(match_operand:QI 1 "move_operand"         "r,I,m,rJ"))]
++  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m,r,k")
++	(match_operand:QI 1 "move_operand" "r,I,m,rJ,k,rJ"))]
+   "(register_operand (operands[0], QImode)
+        || reg_or_0_operand (operands[1], QImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "move,const,load,store")
+-   (set_attr "compression" "all,*,*,*")
++  [(set_attr "move_type" "move,const,load,store,load,store")
+    (set_attr "mode" "QI")])
+ 
+ ;; 32-bit floating point moves
+@@ -2361,13 +1911,13 @@
+ })
+ 
+ (define_insn "*movsf_hardfloat"
+-  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*r,*r,*r,*m")
+-	(match_operand:SF 1 "move_operand" "f,G,m,f,G,*r,*f,*G*r,*m,*r"))]
++  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,f,k,m,*f,*r,*r,*r,*m")
++	(match_operand:SF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*G*r,*m,*r"))]
+   "TARGET_HARD_FLOAT
+    && (register_operand (operands[0], SFmode)
+        || reg_or_0_operand (operands[1], SFmode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store")
++  [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store")
+    (set_attr "mode" "SF")])
+ 
+ (define_insn "*movsf_softfloat"
+@@ -2380,7 +1930,6 @@
+   [(set_attr "move_type" "move,load,store")
+    (set_attr "mode" "SF")])
+ 
+-
+ ;; 64-bit floating point moves
+ 
+ (define_expand "movdf"
+@@ -2393,13 +1942,13 @@
+ })
+ 
+ (define_insn "*movdf_hardfloat"
+-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*r,*r,*r,*m")
+-	(match_operand:DF 1 "move_operand" "f,G,m,f,G,*r,*f,*r*G,*m,*r"))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
++  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,f,k,m,*f,*r,*r,*r,*m")
++	(match_operand:DF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*r*G,*m,*r"))]
++  "TARGET_DOUBLE_FLOAT
+    && (register_operand (operands[0], DFmode)
+        || reg_or_0_operand (operands[1], DFmode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+-  [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store")
++  [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store")
+    (set_attr "mode" "DF")])
+ 
+ (define_insn "*movdf_softfloat"
+@@ -2433,11 +1982,10 @@
+   { return loongarch_output_move (operands[0], operands[1]); }
+   [(set_attr "move_type" "move,const,load,store")
+    (set (attr "mode")
+-   	(if_then_else (eq_attr "move_type" "imul")
++    (if_then_else (eq_attr "move_type" "imul")
+ 		      (const_string "SI")
+ 		      (const_string "TI")))])
+ 
+-
+ ;; 128-bit floating point moves
+ 
+ (define_expand "movtf"
+@@ -2460,11 +2008,10 @@
+   [(set_attr "move_type" "move,load,store,mgtf,mftg,fpload,fpstore")
+    (set_attr "mode" "TF")])
+ 
+-
+ (define_split
+   [(set (match_operand:MOVE64 0 "nonimmediate_operand")
+ 	(match_operand:MOVE64 1 "move_operand"))]
+-  "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)"
++  "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1])"
+   [(const_int 0)]
+ {
+   loongarch_split_move_insn (operands[0], operands[1], curr_insn);
+@@ -2474,7 +2021,7 @@
+ (define_split
+   [(set (match_operand:MOVE128 0 "nonimmediate_operand")
+ 	(match_operand:MOVE128 1 "move_operand"))]
+-  "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)"
++  "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1])"
+   [(const_int 0)]
+ {
+   loongarch_split_move_insn (operands[0], operands[1], curr_insn);
+@@ -2484,7 +2031,7 @@
+ ;; Emit a doubleword move in which exactly one of the operands is
+ ;; a floating-point register.  We can't just emit two normal moves
+ ;; because of the constraints imposed by the FPU register model;
+-;; see loongarch_cannot_change_mode_class for details.  Instead, we keep
++;; see loongarch_can_change_mode_class for details.  Instead, we keep
+ ;; the FPR whole and use special patterns to refer to each word of
+ ;; the other operand.
+ 
+@@ -2516,6 +2063,108 @@
+   DONE;
+ })
+ 
++;; Clear one FCC register
++
++(define_insn "movfcc"
++  [(set (match_operand:FCC 0 "register_operand" "=z")
++	(const_int 0))]
++  ""
++  "movgr2cf\t%0,$r0")
++
++;; Conditional move instructions.
++
++(define_insn "*sel<code><GPR:mode>_using_<GPR2:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r,r")
++	(if_then_else:GPR
++	 (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r")
++			   (const_int 0))
++	 (match_operand:GPR 2 "reg_or_0_operand" "r,J")
++	 (match_operand:GPR 3 "reg_or_0_operand" "J,r")))]
++  "register_operand (operands[2], <GPR:MODE>mode)
++   != register_operand (operands[3], <GPR:MODE>mode)"
++  "@
++   <sel>\t%0,%2,%1
++   <selinv>\t%0,%3,%1"
++  [(set_attr "type" "condmove")
++   (set_attr "mode" "<GPR:MODE>")])
++
++;; fsel copies the 3rd argument when the 1st is non-zero and the 2nd
++;; argument if the 1st is zero.  This means operand 2 and 3 are
++;; inverted in the instruction.
++
++(define_insn "*sel<mode>"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(if_then_else:ANYF
++	 (equality_op:FCC (match_operand:FCC 1 "register_operand" "z")
++		 (const_int 0))
++	 (match_operand:ANYF 2 "reg_or_0_operand" "f")
++	 (match_operand:ANYF 3 "reg_or_0_operand" "f")))]
++  "TARGET_HARD_FLOAT"
++  "fsel\t%0,<fsel_invert>,%1"
++  [(set_attr "type" "condmove")
++   (set_attr "mode" "<ANYF:MODE>")])
++
++;; These are the main define_expand's used to make conditional moves.
++
++(define_expand "mov<mode>cc"
++  [(set (match_operand:GPR 0 "register_operand")
++	(if_then_else:GPR (match_operator 1 "comparison_operator"
++			 [(match_operand:GPR 2 "reg_or_0_operand")
++			  (match_operand:GPR 3 "reg_or_0_operand")])))]
++  "TARGET_COND_MOVE_INT"
++{
++  if(loongarch_expand_conditional_move_la464 (operands))
++	DONE;
++   else
++	FAIL;
++})
++
++(define_expand "mov<mode>cc"
++  [(set (match_operand:ANYF 0 "register_operand")
++	(if_then_else:ANYF (match_operator 1 "comparison_operator"
++			  [(match_operand:ANYF 2 "reg_or_0_operand")
++			   (match_operand:ANYF 3 "reg_or_0_operand")])))]
++  "TARGET_COND_MOVE_FLOAT"
++{
++
++  if(loongarch_expand_conditional_move_la464 (operands))
++	DONE;
++   else
++	FAIL;
++})
++
++(define_insn "lu32i_d"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ior:DI
++	  (zero_extend:DI
++	    (subreg:SI (match_operand:DI 1 "register_operand" "0") 0))
++	  (match_operand:DI 2 "const_lu32i_operand" "u")))]
++  "TARGET_64BIT"
++  "lu32i.d\t%0,%X2>>32"
++  [(set_attr "type" "arith")
++   (set_attr "mode" "DI")])
++
++(define_insn "lu52i_d"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ior:DI
++	  (and:DI (match_operand:DI 1 "register_operand" "r")
++		  (match_operand 2 "lu52i_mask_operand"))
++	  (match_operand 3 "const_lu52i_operand" "v")))]
++  "TARGET_64BIT"
++  "lu52i.d\t%0,%1,%X3>>52"
++  [(set_attr "type" "arith")
++   (set_attr "mode" "DI")])
++
++;; Convert floating-point numbers to integers
++(define_insn "frint_<fmt>"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
++		      UNSPEC_FRINT))]
++  ""
++  "frint.<fmt>\t%0,%1"
++  [(set_attr "type" "fcvt")
++   (set_attr "mode" "<MODE>")])
++
+ ;; Load the low word of operand 0 with operand 1.
+ (define_insn "load_low<mode>"
+   [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+@@ -2559,47 +2208,149 @@
+   [(set_attr "move_type" "mftg,fpstore")
+    (set_attr "mode" "<HALFMODE>")])
+ 
+-;; Move operand 1 to the high word of operand 0 using movgr2frh, preserving the
++;; Thread-Local Storage
++
++(define_insn "got_load_tls_gd<P:mode>"
++  [(set (match_operand:P 0 "register_operand" "=r")
++	(unspec:P
++	    [(match_operand:P 1 "symbolic_operand" "")]
++	    UNSPEC_TLS_GD))]
++  ""
++  "la.tls.gd\t%0,%1"
++  [(set_attr "got" "load")
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "got_load_tls_ld<P:mode>"
++  [(set (match_operand:P 0 "register_operand" "=r")
++	(unspec:P
++	    [(match_operand:P 1 "symbolic_operand" "")]
++	    UNSPEC_TLS_LD))]
++  ""
++  "la.tls.ld\t%0,%1"
++  [(set_attr "got" "load")
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "got_load_tls_le<P:mode>"
++  [(set (match_operand:P 0 "register_operand" "=r")
++	(unspec:P
++	    [(match_operand:P 1 "symbolic_operand" "")]
++	    UNSPEC_TLS_LE))]
++  ""
++  "la.tls.le\t%0,%1"
++  [(set_attr "got" "load")
++   (set_attr "mode" "<MODE>")])
++
++(define_insn "got_load_tls_ie<P:mode>"
++  [(set (match_operand:P 0 "register_operand" "=r")
++	(unspec:P
++	    [(match_operand:P 1 "symbolic_operand" "")]
++	    UNSPEC_TLS_IE))]
++  ""
++  "la.tls.ie\t%0,%1"
++  [(set_attr "got" "load")
++   (set_attr "mode" "<MODE>")])
++
++;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the
+ ;; value in the low word.
+ (define_insn "movgr2frh<mode>"
+   [(set (match_operand:SPLITF 0 "register_operand" "=f")
+ 	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "reg_or_0_operand" "rJ")
+-		        (match_operand:SPLITF 2 "register_operand" "0")]
+-		       UNSPEC_MOVGR2FRH))]
+-  "TARGET_HARD_FLOAT && TARGET_FLOAT64"
+-  "movgr2frh.w\t%z1,%0"
++			(match_operand:SPLITF 2 "register_operand" "0")]
++			UNSPEC_MOVGR2FRH))]
++  "TARGET_DOUBLE_FLOAT"
++  "movgr2frh.w\t%0,%z1"
+   [(set_attr "move_type" "mgtf")
+    (set_attr "mode" "<HALFMODE>")])
+ 
+-;; Move high word of operand 1 to operand 0 using movfrh2gr.
++(define_insn "movsgr2fr<mode>"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(unspec:ANYF [(match_operand:SI 1 "register_operand" "r")]
++			UNSPEC_MOVGR2FR))]
++  "TARGET_DOUBLE_FLOAT"
++  "movgr2fr.w\t%0,%1"
++  )
++(define_insn "movdgr2fr<mode>"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(unspec:ANYF [(match_operand:DI 1 "register_operand" "r")]
++			UNSPEC_MOVGR2FR))]
++  "TARGET_DOUBLE_FLOAT"
++  "movgr2fr.d\t%0,%1"
++  )
++
++;; Move high word of operand 1 to operand 0 using movfrh2gr.s.
+ (define_insn "movfrh2gr<mode>"
+   [(set (match_operand:<HALFMODE> 0 "register_operand" "=r")
+ 	(unspec:<HALFMODE> [(match_operand:SPLITF 1 "register_operand" "f")]
+ 			    UNSPEC_MOVFRH2GR))]
+-  "TARGET_HARD_FLOAT && TARGET_FLOAT64" 
++  "TARGET_DOUBLE_FLOAT"
+   "movfrh2gr.s\t%0,%1"
+   [(set_attr "move_type" "mftg")
+    (set_attr "mode" "<HALFMODE>")])
+ 
++(define_insn "movsfr2gr<mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(unspec:GPR [(match_operand:SF 1 "register_operand" "f")]
++			    UNSPEC_MOVFR2GR))]
++  "TARGET_DOUBLE_FLOAT"
++  "movfr2gr.s\t%0,%1"
++  )
++(define_insn "movdfr2gr<mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(unspec:GPR [(match_operand:DF 1 "register_operand" "f")]
++			    UNSPEC_MOVFR2GR))]
++  "TARGET_DOUBLE_FLOAT"
++  "movfr2gr.d\t%0,%1"
++  )
++
++(define_insn "movfr2fcc<mode>"
++  [(set (match_operand:FCC 0 "register_operand" "=z")
++       (unspec:FCC [(match_operand:ANYF 1 "register_operand" "f")]
++                           UNSPEC_MOVFR2FCC))]
++  "TARGET_HARD_FLOAT"
++  "movfr2cf\t%0,%1"
++  [(set_attr "mode" "<MODE>")])
++
++(define_insn "movgr2fcc<mode>"
++  [(set (match_operand:FCC 0 "register_operand" "=z")
++       (unspec:FCC [(match_operand:GPR 1 "register_operand" "r")]
++                           UNSPEC_MOVGR2FCC))]
++  "TARGET_HARD_FLOAT"
++  "movgr2cf\t%0,%1"
++  [(set_attr "mode" "<MODE>")])
++
++(define_insn "movfcc2gr<mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++       (unspec:GPR [(match_operand:FCC 1 "register_operand" "z")]
++                           UNSPEC_MOVFCC2GR))]
++  "TARGET_HARD_FLOAT"
++  "movcf2gr\t%0,%1"
++  [ (set_attr "mode" "<MODE>")])
++
++
+ ;; Expand in-line code to clear the instruction cache between operand[0] and
+ ;; operand[1].
+ (define_expand "clear_cache"
+   [(match_operand 0 "pmode_register_operand")
+    (match_operand 1 "pmode_register_operand")]
+   ""
+-  "
+ {
+-  emit_insn (gen_ibar (const0_rtx));
++  emit_insn (gen_loongarch_ibar (const0_rtx));
+   DONE;
+-}")
++})
+ 
+-(define_insn "ibar"
+-  [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_IBAR)]
++(define_insn "loongarch_ibar"
++  [(unspec_volatile:SI
++      [(match_operand 0 "const_uimm15_operand")]
++       UNSPECV_IBAR)
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "ibar\t%0")
+ 
+-(define_insn "dbar"
+-  [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_DBAR)]
++(define_insn "loongarch_dbar"
++  [(unspec_volatile:SI
++      [(match_operand 0 "const_uimm15_operand")]
++       UNSPECV_DBAR)
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "dbar\t%0")
+ 
+@@ -2607,118 +2358,142 @@
+ 
+ ;; Privileged state instruction
+ 
+-(define_insn "cpucfg"
++(define_insn "loongarch_cpucfg"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")]
+-			     UNSPEC_CPUCFG))]
++			     UNSPECV_CPUCFG))]
+   ""
+   "cpucfg\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"SI")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "SI")])
++
++(define_insn "loongarch_syscall"
++  [(unspec_volatile:SI
++      [(match_operand 0 "const_uimm15_operand")]
++       UNSPECV_SYSCALL)
++   (clobber (mem:BLK (scratch)))]
++  ""
++  "syscall\t%0")
++
++(define_insn "loongarch_break"
++  [(unspec_volatile:SI
++      [(match_operand 0 "const_uimm15_operand")]
++       UNSPECV_BREAK)
++   (clobber (mem:BLK (scratch)))]
++  ""
++  "break\t%0")
+ 
+-(define_insn "asrtle_d"
+-	[(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r")
+-                       (match_operand:DI 1 "register_operand" "r")]
+-			     UNSPEC_ASRTLE_D)]
++(define_insn "loongarch_asrtle_d"
++  [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r")
++			(match_operand:DI 1 "register_operand" "r")]
++		       UNSPECV_ASRTLE_D)]
+   "TARGET_64BIT"
+   "asrtle.d\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"DI")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "DI")])
+ 
+-(define_insn "asrtgt_d"
+-	[(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r")
+-                       (match_operand:DI 1 "register_operand" "r")]
+-			     UNSPEC_ASRTGT_D)]
++(define_insn "loongarch_asrtgt_d"
++  [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "r")
++			(match_operand:DI 1 "register_operand" "r")]
++		       UNSPECV_ASRTGT_D)]
+   "TARGET_64BIT"
+   "asrtgt.d\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"DI")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "DI")])
+ 
+-(define_insn "<p>csrrd"
++(define_insn "loongarch_csrrd_<d>"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+ 	(unspec_volatile:GPR [(match_operand  1 "const_uimm14_operand")]
+-			     UNSPEC_CSRRD))]
++			     UNSPECV_CSRRD))
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "csrrd\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "<p>csrwr"
++(define_insn "loongarch_csrwr_<d>"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+-         (unspec_volatile:GPR
+-          [(match_operand:GPR 1 "register_operand" "0")
+-           (match_operand 2 "const_uimm14_operand")]
+-	  UNSPEC_CSRWR))]
++	  (unspec_volatile:GPR
++	    [(match_operand:GPR 1 "register_operand" "0")
++	     (match_operand 2 "const_uimm14_operand")]
++	    UNSPECV_CSRWR))
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "csrwr\t%0,%2"
+-  [(set_attr "type"	"store")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "store")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "<p>csrxchg"
++(define_insn "loongarch_csrxchg_<d>"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+-         (unspec_volatile:GPR
+-          [(match_operand:GPR 1 "register_operand" "0")
+-           (match_operand:GPR 2 "register_operand" "q")
+-           (match_operand     3 "const_uimm14_operand")]
+-          UNSPEC_CSRXCHG))]
++	  (unspec_volatile:GPR
++	    [(match_operand:GPR 1 "register_operand" "0")
++	     (match_operand:GPR 2 "register_operand" "q")
++	     (match_operand 3 "const_uimm14_operand")]
++	    UNSPECV_CSRXCHG))
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "csrxchg\t%0,%2,%3"
+-  [(set_attr "type"    "load")
+-   (set_attr "mode"    "<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "iocsrrd_<size>"
++(define_insn "loongarch_iocsrrd_<size>"
+   [(set (match_operand:QHWD 0 "register_operand" "=r")
+-	(unspec_volatile:QHWD [(match_operand:SI  1 "register_operand" "r")]
+-			      UNSPEC_IOCSRRD))]
++	(unspec_volatile:QHWD [(match_operand:SI 1 "register_operand" "r")]
++			      UNSPECV_IOCSRRD))
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "iocsrrd.<size>\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "iocsrwr_<size>"
++(define_insn "loongarch_iocsrwr_<size>"
+   [(unspec_volatile:QHWD [(match_operand:QHWD 0 "register_operand" "r")
+-                        (match_operand:SI 1 "register_operand" "r")]
+-                        UNSPEC_IOCSRWR)]
++			  (match_operand:SI 1 "register_operand" "r")]
++			 UNSPECV_IOCSRWR)
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "iocsrwr.<size>\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "<p>cacop"
++(define_insn "loongarch_cacop_<d>"
+   [(unspec_volatile:X [(match_operand 0 "const_uimm5_operand")
+-			 (match_operand:X 1 "register_operand" "r")
+-			 (match_operand 2 "const_imm12_operand")]
+-			 UNSPEC_CACOP)]
++		       (match_operand:X 1 "register_operand" "r")
++		       (match_operand 2 "const_imm12_operand")]
++		      UNSPECV_CACOP)
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "cacop\t%0,%1,%2"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "<p>lddir"
++(define_insn "loongarch_lddir_<d>"
+   [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
+-			 (match_operand:X 1 "register_operand" "r")
+-			 (match_operand 2 "const_uimm5_operand")]
+-			 UNSPEC_LDDIR)]
++		       (match_operand:X 1 "register_operand" "r")
++		       (match_operand 2 "const_uimm5_operand")]
++		      UNSPECV_LDDIR)
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "lddir\t%0,%1,%2"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "<p>ldpte"
++(define_insn "loongarch_ldpte_<d>"
+   [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
+ 			 (match_operand 1 "const_uimm5_operand")]
+-			 UNSPEC_LDPTE)]
++			 UNSPECV_LDPTE)
++   (clobber (mem:BLK (scratch)))]
+   ""
+   "ldpte\t%0,%1"
+-  [(set_attr "type"	"load")
+-   (set_attr "mode"	"<MODE>")])
++  [(set_attr "type" "load")
++   (set_attr "mode" "<MODE>")])
+ 
+ 
+ ;; Block moves, see loongarch.c for more details.
+-;; Argument 0 is the destination
+-;; Argument 1 is the source
+-;; Argument 2 is the length
+-;; Argument 3 is the alignment
++;; Argument 0 is the destination.
++;; Argument 1 is the source.
++;; Argument 2 is the length.
++;; Argument 3 is the alignment.
+ 
+ (define_expand "movmemsi"
+   [(parallel [(set (match_operand:BLK 0 "general_operand")
+@@ -2740,30 +2515,19 @@
+ ;;
+ ;;  ....................
+ 
+-(define_expand "<optab><mode>3"
+-  [(set (match_operand:GPR 0 "register_operand")
+-	(any_shift:GPR (match_operand:GPR 1 "register_operand")
+-		       (match_operand:SI 2 "arith_operand")))]
+-  ""
+-{
+-})
+-
+-(define_insn "*<optab><mode>3"
++(define_insn "<optab><mode>3"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+ 	(any_shift:GPR (match_operand:GPR 1 "register_operand" "r")
+ 		       (match_operand:SI 2 "arith_operand" "rI")))]
+   ""
+ {
+   if (CONST_INT_P (operands[2]))
+-  {
+     operands[2] = GEN_INT (INTVAL (operands[2])
+ 			   & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+-    return "<insn>i.<d>\t%0,%1,%2";
+-  } else
+-    return "<insn>.<d>\t%0,%1,%2";
++
++  return "<insn>%i2.<d>\t%0,%1,%2";
+ }
+   [(set_attr "type" "shift")
+-   (set_attr "compression" "none")
+    (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "*<optab>si3_extend"
+@@ -2774,86 +2538,68 @@
+   "TARGET_64BIT"
+ {
+   if (CONST_INT_P (operands[2]))
+-  {
+     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+-    return "<insn>i.w\t%0,%1,%2";
+-  } else
+-    return "<insn>.w\t%0,%1,%2";
++
++  return "<insn>%i2.w\t%0,%1,%2";
+ }
+   [(set_attr "type" "shift")
+    (set_attr "mode" "SI")])
+ 
+-(define_insn "zero_extend_ashift1"
+- [ (set (match_operand:DI 0 "register_operand" "=r")
+-        (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+-                           (match_operand 2 "const_immlsa_operand" ""))
+-                    (match_operand 3 "shift_mask_operand" "")))]
+-""
+-"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2"
+-[(set_attr "type" "arith")
+- (set_attr "mode" "DI")
+- (set_attr "insn_count" "2")])
+-
+-(define_insn "zero_extend_ashift2"
+- [ (set (match_operand:DI 0 "register_operand" "=r")
+-        (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+-                           (match_operand 2 "const_immlsa_operand" ""))
+-                    (match_operand 3 "shift_mask_operand" "")))]
+-""
+-"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2"
+-[(set_attr "type" "arith")
+- (set_attr "mode" "DI")
+- (set_attr "insn_count" "2")])
+-
+-(define_insn "alsl_paired1"
+- [(set (match_operand:DI 0 "register_operand" "=&r")
+-       (plus:DI (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+-			   (match_operand 2 "const_immlsa_operand" ""))
+-			(match_operand 3 "shift_mask_operand" ""))
+-		(match_operand:DI 4 "register_operand" "r")))]
+- ""
+- "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,%4,%2"
+- [(set_attr "type" "arith")
+-  (set_attr "mode" "DI")
+-  (set_attr "insn_count" "2")])
+-
+-(define_insn "alsl_paired2"
+- [(set (match_operand:DI 0 "register_operand" "=&r")
+-       (plus:DI (match_operand:DI 1 "register_operand" "r")
+-		(and:DI (ashift:DI (match_operand:DI 2 "register_operand" "r")
+-			   (match_operand 3 "const_immlsa_operand" ""))
+-			(match_operand 4 "shift_mask_operand" ""))))]
+- ""
+- "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3"
+- [(set_attr "type" "arith")
+-  (set_attr "mode" "DI")
+-  (set_attr "insn_count" "2")])
+-
+-(define_insn "alsl_<GPR:d>"
+- [(set (match_operand:GPR 0 "register_operand" "=r")
+-       (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r")
+-			     (match_operand 2 "const_immlsa_operand" ""))
+-		(match_operand:GPR 3 "register_operand" "r")))]
+- "ISA_HAS_<GPR:D>LSA"
+- "alsl.<GPR:d>\t%0,%1,%3,%2"
+- [(set_attr "type" "arith")
+-  (set_attr "mode" "<GPR:MODE>")])
+-
+ (define_insn "rotr<mode>3"
++  [(set (match_operand:GPR 0 "register_operand" "=r,r")
++	(rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r")
++		      (match_operand:SI 2 "arith_operand" "r,I")))]
++  ""
++  "rotr%i2.<d>\t%0,%1,%2"
++  [(set_attr "type" "shift,shift")
++   (set_attr "mode" "<MODE>")])
++
++;; The following templates were added to generate "bstrpick.d + alsl.d"
++;; instruction pairs.
++;; It is required that the values of const_immalsl_operand and
++;; immediate_operand must have the following correspondence:
++;;
++;; (immediate_operand >> const_immalsl_operand) == 0xffffffff
++
++(define_insn "zero_extend_ashift"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
++			   (match_operand 2 "const_immalsl_operand" ""))
++		(match_operand 3 "immediate_operand" "")))]
++  "TARGET_64BIT
++   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
++  "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2"
++  [(set_attr "type" "arith")
++   (set_attr "mode" "DI")
++   (set_attr "insn_count" "2")])
++
++(define_insn "bstrpick_alsl_paired"
++  [(set (match_operand:DI 0 "register_operand" "=&r")
++	(plus:DI (match_operand:DI 1 "register_operand" "r")
++		 (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "r")
++				    (match_operand 3 "const_immalsl_operand" ""))
++			 (match_operand 4 "immediate_operand" ""))))]
++  "TARGET_64BIT
++   && ((INTVAL (operands[4]) >> INTVAL (operands[3])) == 0xffffffff)"
++  "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3"
++  [(set_attr "type" "arith")
++   (set_attr "mode" "DI")
++   (set_attr "insn_count" "2")])
++
++(define_insn "alsl<mode>3"
+   [(set (match_operand:GPR 0 "register_operand" "=r")
+-	(rotatert:GPR (match_operand:GPR 1 "register_operand" "r")
+-		      (match_operand:SI 2 "arith_operand" "rI")))]
++	(plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r")
++			      (match_operand 2 "const_immalsl_operand" ""))
++		  (match_operand:GPR 3 "register_operand" "r")))]
+   ""
+-{
+-  if (CONST_INT_P (operands[2])) 
+-  {
+-    return "rotri.<d>\t%0,%1,%2";
+-  } else
+-    return "rotr.<d>\t%0,%1,%2";
+-}
+-  [(set_attr "type" "shift")
++  "alsl.<d>\t%0,%1,%3,%2"
++  [(set_attr "type" "arith")
+    (set_attr "mode" "<MODE>")])
+ 
++
++
++;; Reverse the order of bytes of operand 1 and store the result in operand 0.
++
+ (define_insn "bswaphi2"
+   [(set (match_operand:HI 0 "register_operand" "=r")
+ 	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+@@ -2867,7 +2613,7 @@
+   ""
+   "#"
+   ""
+-  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH))
++  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_REVB_2H))
+    (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))]
+   ""
+   [(set_attr "insn_count" "2")])
+@@ -2878,28 +2624,28 @@
+   "TARGET_64BIT"
+   "#"
+   ""
+-  [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH))
+-   (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))]
++  [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_REVB_4H))
++   (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_REVH_D))]
+   ""
+   [(set_attr "insn_count" "2")])
+ 
+-(define_insn "wsbh"
++(define_insn "revb_2h"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+-	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_WSBH))]
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_REVB_2H))]
+   ""
+   "revb.2h\t%0,%1"
+   [(set_attr "type" "shift")])
+ 
+-(define_insn "dsbh"
++(define_insn "revb_4h"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+-	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_DSBH))]
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVB_4H))]
+   "TARGET_64BIT"
+   "revb.4h\t%0,%1"
+   [(set_attr "type" "shift")])
+ 
+-(define_insn "dshd"
++(define_insn "revh_d"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+-	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_DSHD))]
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVH_D))]
+   "TARGET_64BIT"
+   "revh.d\t%0,%1"
+   [(set_attr "type" "shift")])
+@@ -2911,37 +2657,37 @@
+ ;;
+ ;;  ....................
+ 
+-;; Conditional branches on floating-point equality tests.
++;; Conditional branches
+ 
+-(define_insn "*branch_fp_fcc"
++(define_insn "*branch_fp_FCCmode"
+   [(set (pc)
+-        (if_then_else
+-         (match_operator 1 "equality_operator"
+-                         [(match_operand:FCC 2 "register_operand" "z")
+-			  (const_int 0)])
+-         (label_ref (match_operand 0 "" ""))
+-         (pc)))]
++	(if_then_else
++	  (match_operator 1 "equality_operator"
++	      [(match_operand:FCC 2 "register_operand" "z")
++		(const_int 0)])
++	  (label_ref (match_operand 0 "" ""))
++	(pc)))]
+   "TARGET_HARD_FLOAT"
+ {
+   return loongarch_output_conditional_branch (insn, operands,
+-					 LARCH_BRANCH ("b%F1", "%Z2%0"),
+-					 LARCH_BRANCH ("b%W1", "%Z2%0"));
++					      LARCH_BRANCH ("b%F1", "%Z2%0"),
++					      LARCH_BRANCH ("b%W1", "%Z2%0"));
+ }
+   [(set_attr "type" "branch")])
+ 
+-(define_insn "*branch_fp_inverted_fcc"
++(define_insn "*branch_fp_inverted_FCCmode"
+   [(set (pc)
+-        (if_then_else
+-         (match_operator 1 "equality_operator"
+-                         [(match_operand:FCC 2 "register_operand" "z")
+-			  (const_int 0)])
+-         (pc)
+-         (label_ref (match_operand 0 "" ""))))]
++	(if_then_else
++	  (match_operator 1 "equality_operator"
++	    [(match_operand:FCC 2 "register_operand" "z")
++	    (const_int 0)])
++	  (pc)
++	  (label_ref (match_operand 0 "" ""))))]
+   "TARGET_HARD_FLOAT"
+ {
+   return loongarch_output_conditional_branch (insn, operands,
+-					 LARCH_BRANCH ("b%W1", "%Z2%0"),
+-					 LARCH_BRANCH ("b%F1", "%Z2%0"));
++					      LARCH_BRANCH ("b%W1", "%Z2%0"),
++					      LARCH_BRANCH ("b%F1", "%Z2%0"));
+ }
+   [(set_attr "type" "branch")])
+ 
+@@ -2951,28 +2697,26 @@
+   [(set (pc)
+ 	(if_then_else
+ 	 (match_operator 1 "order_operator"
+-			 [(match_operand:GPR 2 "register_operand" "r,r")
+-			  (match_operand:GPR 3 "reg_or_0_operand" "J,r")])
++			 [(match_operand:X 2 "register_operand" "r,r")
++			  (match_operand:X 3 "reg_or_0_operand" "J,r")])
+ 	 (label_ref (match_operand 0 "" ""))
+ 	 (pc)))]
+   ""
+   { return loongarch_output_order_conditional_branch (insn, operands, false); }
+   [(set_attr "type" "branch")
+-   (set_attr "compact_form" "maybe,always")
+    (set_attr "hazard" "forbidden_slot")])
+ 
+ (define_insn "*branch_order<mode>_inverted"
+   [(set (pc)
+ 	(if_then_else
+ 	 (match_operator 1 "order_operator"
+-			 [(match_operand:GPR 2 "register_operand" "r,r")
+-			  (match_operand:GPR 3 "reg_or_0_operand" "J,r")])
++			 [(match_operand:X 2 "register_operand" "r,r")
++			  (match_operand:X 3 "reg_or_0_operand" "J,r")])
+ 	 (pc)
+ 	 (label_ref (match_operand 0 "" ""))))]
+   ""
+   { return loongarch_output_order_conditional_branch (insn, operands, true); }
+   [(set_attr "type" "branch")
+-   (set_attr "compact_form" "maybe,always")
+    (set_attr "hazard" "forbidden_slot")])
+ 
+ ;; Conditional branch on equality comparison.
+@@ -2981,14 +2725,13 @@
+   [(set (pc)
+ 	(if_then_else
+ 	 (match_operator 1 "equality_operator"
+-			 [(match_operand:GPR 2 "register_operand" "r")
+-			  (match_operand:GPR 3 "reg_or_0_operand" "rJ")])
++			 [(match_operand:X 2 "register_operand" "r")
++			  (match_operand:X 3 "reg_or_0_operand" "rJ")])
+ 	 (label_ref (match_operand 0 "" ""))
+ 	 (pc)))]
+   ""
+   { return loongarch_output_equal_conditional_branch (insn, operands, false); }
+   [(set_attr "type" "branch")
+-   (set_attr "compact_form" "maybe")
+    (set_attr "hazard" "forbidden_slot")])
+ 
+ 
+@@ -2996,22 +2739,21 @@
+   [(set (pc)
+ 	(if_then_else
+ 	 (match_operator 1 "equality_operator"
+-			 [(match_operand:GPR 2 "register_operand" "r")
+-			  (match_operand:GPR 3 "reg_or_0_operand" "rJ")])
++			 [(match_operand:X 2 "register_operand" "r")
++			  (match_operand:X 3 "reg_or_0_operand" "rJ")])
+ 	 (pc)
+ 	 (label_ref (match_operand 0 "" ""))))]
+   ""
+   { return loongarch_output_equal_conditional_branch (insn, operands, true); }
+   [(set_attr "type" "branch")
+-   (set_attr "compact_form" "maybe")
+    (set_attr "hazard" "forbidden_slot")])
+ 
+ 
+ (define_expand "cbranch<mode>4"
+   [(set (pc)
+ 	(if_then_else (match_operator 0 "comparison_operator"
+-		       [(match_operand:GPR 1 "register_operand")
+-		        (match_operand:GPR 2 "nonmemory_operand")])
++			[(match_operand:GPR 1 "register_operand")
++			 (match_operand:GPR 2 "nonmemory_operand")])
+ 		      (label_ref (match_operand 3 ""))
+ 		      (pc)))]
+   ""
+@@ -3023,8 +2765,8 @@
+ (define_expand "cbranch<mode>4"
+   [(set (pc)
+ 	(if_then_else (match_operator 0 "comparison_operator"
+-		       [(match_operand:SCALARF 1 "register_operand")
+-		        (match_operand:SCALARF 2 "register_operand")])
++			[(match_operand:ANYF 1 "register_operand")
++			 (match_operand:ANYF 2 "register_operand")])
+ 		      (label_ref (match_operand 3 ""))
+ 		      (pc)))]
+   ""
+@@ -3062,71 +2804,63 @@
+   DONE;
+ })
+ 
+-(define_insn "*seq_zero_<GPR:mode><GPR2:mode>"
+-  [(set (match_operand:GPR2 0 "register_operand" "=r")
+-	(eq:GPR2 (match_operand:GPR 1 "register_operand" "r")
++(define_insn "*seq_zero_<X:mode><GPR:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(eq:GPR (match_operand:X 1 "register_operand" "r")
+ 		 (const_int 0)))]
+   ""
+   "sltui\t%0,%1,1"
+   [(set_attr "type" "slt")
+-   (set_attr "mode" "<GPR:MODE>")])
++   (set_attr "mode" "<X:MODE>")])
+ 
+ 
+-(define_insn "*sne_zero_<GPR:mode><GPR2:mode>"
+-  [(set (match_operand:GPR2 0 "register_operand" "=r")
+-	(ne:GPR2 (match_operand:GPR 1 "register_operand" "r")
++(define_insn "*sne_zero_<X:mode><GPR:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(ne:GPR (match_operand:X 1 "register_operand" "r")
+ 		 (const_int 0)))]
+   ""
+   "sltu\t%0,%.,%1"
+   [(set_attr "type" "slt")
+-   (set_attr "mode" "<GPR:MODE>")])
++   (set_attr "mode" "<X:MODE>")])
+ 
+-(define_insn "*sgt<u>_<GPR:mode><GPR2:mode>"
+-  [(set (match_operand:GPR2 0 "register_operand" "=r")
+-	(any_gt:GPR2 (match_operand:GPR 1 "register_operand" "r")
+-		     (match_operand:GPR 2 "reg_or_0_operand" "rJ")))]
++(define_insn "*sgt<u>_<X:mode><GPR:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(any_gt:GPR (match_operand:X 1 "register_operand" "r")
++		     (match_operand:X 2 "reg_or_0_operand" "rJ")))]
+   ""
+   "slt<u>\t%0,%z2,%1"
+   [(set_attr "type" "slt")
+-   (set_attr "mode" "<GPR:MODE>")])
++   (set_attr "mode" "<X:MODE>")])
+ 
+-
+-(define_insn "*sge<u>_<GPR:mode><GPR2:mode>"
+-  [(set (match_operand:GPR2 0 "register_operand" "=r")
+-	(any_ge:GPR2 (match_operand:GPR 1 "register_operand" "r")
++(define_insn "*sge<u>_<X:mode><GPR:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(any_ge:GPR (match_operand:X 1 "register_operand" "r")
+ 		     (const_int 1)))]
+   ""
+   "slt<u>i\t%0,%.,%1"
+   [(set_attr "type" "slt")
+-   (set_attr "mode" "<GPR:MODE>")])
++   (set_attr "mode" "<X:MODE>")])
+ 
+-(define_insn "*slt<u>_<GPR:mode><GPR2:mode>"
+-  [(set (match_operand:GPR2 0 "register_operand" "=r")
+-	(any_lt:GPR2 (match_operand:GPR 1 "register_operand" "r")
+-		     (match_operand:GPR 2 "arith_operand" "rI")))]
++(define_insn "*slt<u>_<X:mode><GPR:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(any_lt:GPR (match_operand:X 1 "register_operand" "r")
++		     (match_operand:X 2 "arith_operand" "rI")))]
+   ""
+-{
+-  if (CONST_INT_P (operands[2])) 
+-  {
+-    return "slt<u>i\t%0,%1,%2";
+-  } else
+-    return "slt<u>\t%0,%1,%2";
+-}
++  "slt<u>%i2\t%0,%1,%2";
+   [(set_attr "type" "slt")
+-   (set_attr "mode" "<GPR:MODE>")])
++   (set_attr "mode" "<X:MODE>")])
+ 
+-
+-(define_insn "*sle<u>_<GPR:mode><GPR2:mode>"
+-  [(set (match_operand:GPR2 0 "register_operand" "=r")
+-	(any_le:GPR2 (match_operand:GPR 1 "register_operand" "r")
+-		     (match_operand:GPR 2 "sle_operand" "")))]
++(define_insn "*sle<u>_<X:mode><GPR:mode>"
++  [(set (match_operand:GPR 0 "register_operand" "=r")
++	(any_le:GPR (match_operand:X 1 "register_operand" "r")
++		     (match_operand:X 2 "sle_operand" "")))]
+   ""
+ {
+   operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+   return "slt<u>i\t%0,%1,%2";
+ }
+   [(set_attr "type" "slt")
+-   (set_attr "mode" "<GPR:MODE>")])
++   (set_attr "mode" "<X:MODE>")])
+ 
+ 
+ ;;
+@@ -3136,23 +2870,15 @@
+ ;;
+ ;;  ....................
+ 
+-(define_insn "s<code>_<SCALARF:mode>_using_fcc"
++(define_insn "s<code>_<ANYF:mode>_using_FCCmode"
+   [(set (match_operand:FCC 0 "register_operand" "=z")
+-	(fcond:FCC (match_operand:SCALARF 1 "register_operand" "f")
+-		    (match_operand:SCALARF 2 "register_operand" "f")))]
++	(fcond:FCC (match_operand:ANYF 1 "register_operand" "f")
++		   (match_operand:ANYF 2 "register_operand" "f")))]
+   ""
+   "fcmp.<fcond>.<fmt>\t%Z0%1,%2"
+   [(set_attr "type" "fcmp")
+    (set_attr "mode" "FCC")])
+ 
+-(define_insn "s<code>_<SCALARF:mode>_using_fcc"
+-  [(set (match_operand:FCC 0 "register_operand" "=z")
+-	(swapped_fcond:FCC (match_operand:SCALARF 1 "register_operand" "f")
+-			    (match_operand:SCALARF 2 "register_operand" "f")))]
+-  ""
+-  "fcmp.<swapped_fcond>.<fmt>\t%Z0%2,%1"
+-  [(set_attr "type" "fcmp")
+-   (set_attr "mode" "FCC")])
+ 
+ ;;
+ ;;  ....................
+@@ -3170,24 +2896,20 @@
+ (define_insn "*jump_absolute"
+   [(set (pc)
+ 	(label_ref (match_operand 0)))]
+-  "TARGET_ABSOLUTE_JUMPS"
++  "!flag_pic"
+ {
+-  return LARCH_ABSOLUTE_JUMP ("b\t%l0");
++  return "b\t%l0";
+ }
+-  [(set_attr "type" "branch")
+-   (set_attr "compact_form" "maybe")])
++  [(set_attr "type" "branch")])
+ 
+ (define_insn "*jump_pic"
+   [(set (pc)
+ 	(label_ref (match_operand 0)))]
+-  "!TARGET_ABSOLUTE_JUMPS"
++  "flag_pic"
+ {
+   return "b\t%0";
+ }
+-  [(set_attr "type" "branch")
+-   (set_attr "compact_form" "maybe")])
+-
+-
++  [(set_attr "type" "branch")])
+ 
+ (define_expand "indirect_jump"
+   [(set (pc) (match_operand 0 "register_operand"))]
+@@ -3198,12 +2920,10 @@
+   DONE;
+ })
+ 
+-(define_insn "indirect_jump_<mode>"
++(define_insn "indirect_jump<mode>"
+   [(set (pc) (match_operand:P 0 "register_operand" "r"))]
+   ""
+-  {
+-    return "jr\t%0";
+-  }
++  "jr\t%0"
+   [(set_attr "type" "jump")
+    (set_attr "mode" "none")])
+ 
+@@ -3214,25 +2934,25 @@
+   ""
+ {
+   if (flag_pic)
+-      operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
+-					 gen_rtx_LABEL_REF (Pmode, operands[1]),
+-					 NULL_RTX, 0, OPTAB_DIRECT);
++    operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
++				       gen_rtx_LABEL_REF (Pmode,
++							  operands[1]),
++				       NULL_RTX, 0, OPTAB_DIRECT);
+   emit_jump_insn (PMODE_INSN (gen_tablejump, (operands[0], operands[1])));
+   DONE;
+ })
+ 
+-(define_insn "tablejump_<mode>"
++(define_insn "tablejump<mode>"
+   [(set (pc)
+ 	(match_operand:P 0 "register_operand" "r"))
+    (use (label_ref (match_operand 1 "" "")))]
+   ""
+-  {
+-    return "jr\t%0";
+-  }
++  "jr\t%0"
+   [(set_attr "type" "jump")
+    (set_attr "mode" "none")])
+ 
+ 
++
+ ;;
+ ;;  ....................
+ ;;
+@@ -3254,22 +2974,25 @@
+ ;; saved or used to pass arguments.
+ 
+ (define_insn "blockage"
+-  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
++  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+   ""
+   ""
+   [(set_attr "type" "ghost")
+    (set_attr "mode" "none")])
+ 
+-(define_insn "probe_stack_range_<P:mode>"
++(define_insn "probe_stack_range<P:mode>"
+   [(set (match_operand:P 0 "register_operand" "=r")
+ 	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+ 			    (match_operand:P 2 "register_operand" "r")
+-          (match_operand:P 3 "register_operand" "r")]
+-			    UNSPEC_PROBE_STACK_RANGE))]
++			    (match_operand:P 3 "register_operand" "r")]
++			    UNSPECV_PROBE_STACK_RANGE))]
+   ""
+- { return loongarch_output_probe_stack_range (operands[0], operands[2], operands[3]); }
++{
++  return loongarch_output_probe_stack_range (operands[0],
++					     operands[2],
++					     operands[3]);
++}
+   [(set_attr "type" "unknown")
+-   (set_attr "can_delay" "no")
+    (set_attr "mode" "<MODE>")])
+ 
+ (define_expand "epilogue"
+@@ -3304,12 +3027,12 @@
+ (define_insn "*<optab>"
+   [(any_return)]
+   ""
+-  {
+-    operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+-    return "jr\t%0";
+-  }
+-  [(set_attr "type"	"jump")
+-   (set_attr "mode"	"none")])
++{
++  operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
++  return "jr\t%0";
++}
++  [(set_attr "type" "jump")
++   (set_attr "mode" "none")])
+ 
+ ;; Normal return.
+ 
+@@ -3317,46 +3040,18 @@
+   [(any_return)
+    (use (match_operand 0 "pmode_register_operand" ""))]
+   ""
+-  {
+-    return "jr\t%0";
+-  }
+-  [(set_attr "type"	"jump")
+-   (set_attr "mode"	"none")])
+-
+-;; Exception return.
+-(define_insn "loongarch_ertn"
+-  [(return)
+-   (unspec_volatile [(const_int 0)] UNSPEC_ERTN)]
+-  ""
+-  "ertn"
+-  [(set_attr "type"	"trap")
+-   (set_attr "mode"	"none")])
+-
+-;; Disable interrupts.
+-(define_insn "loongarch_di"
+-  [(unspec_volatile [(const_int 0)] UNSPEC_DI)]
+-  ""
+-  "di"
+-  [(set_attr "type"	"trap")
+-   (set_attr "mode"	"none")])
+-
+-;; Execution hazard barrier.
+-(define_insn "loongarch_ehb"
+-  [(unspec_volatile [(const_int 0)] UNSPEC_EHB)]
+-  ""
+-  "ehb"
+-  [(set_attr "type"	"trap")
+-   (set_attr "mode"	"none")])
++  "jr\t%0"
++  [(set_attr "type" "jump")
++   (set_attr "mode" "none")])
+ 
+-;; Read GPR from previous shadow register set.
+-(define_insn "loongarch_rdpgpr_<mode>"
+-  [(set (match_operand:P 0 "register_operand" "=r")
+-	(unspec_volatile:P [(match_operand:P 1 "register_operand" "r")]
+-			   UNSPEC_RDPGPR))]
++;; Exception return.
++(define_insn "loongarch_ertn"
++  [(return)
++   (unspec_volatile [(const_int 0)] UNSPECV_ERTN)]
+   ""
+-  "rdpgpr\t%0,%1"
+-  [(set_attr "type"	"move")
+-   (set_attr "mode"	"<MODE>")])
++  "ertn"
++  [(set_attr "type" "trap")
++   (set_attr "mode" "none")])
+ 
+ ;; This is used in compiling the unwind routines.
+ (define_expand "eh_return"
+@@ -3366,22 +3061,22 @@
+   if (GET_MODE (operands[0]) != word_mode)
+     operands[0] = convert_to_mode (word_mode, operands[0], 0);
+   if (TARGET_64BIT)
+-    emit_insn (gen_eh_set_lr_di (operands[0]));
++    emit_insn (gen_eh_set_ra_di (operands[0]));
+   else
+-    emit_insn (gen_eh_set_lr_si (operands[0]));
++    emit_insn (gen_eh_set_ra_si (operands[0]));
+   DONE;
+ })
+ 
+ ;; Clobber the return address on the stack.  We can't expand this
+ ;; until we know where it will be put in the stack frame.
+ 
+-(define_insn "eh_set_lr_si"
++(define_insn "eh_set_ra_si"
+   [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_EH_RETURN)
+    (clobber (match_scratch:SI 1 "=&r"))]
+   "! TARGET_64BIT"
+   "#")
+ 
+-(define_insn "eh_set_lr_di"
++(define_insn "eh_set_ra_di"
+   [(unspec [(match_operand:DI 0 "register_operand" "r")] UNSPEC_EH_RETURN)
+    (clobber (match_scratch:DI 1 "=&r"))]
+   "TARGET_64BIT"
+@@ -3406,23 +3101,14 @@
+ ;;
+ ;;  ....................
+ 
+-
+ ;; Sibling calls.  All these patterns use jump instructions.
+ 
+-;; If TARGET_SIBCALLS, call_insn_operand will only accept constant
+-;; addresses if a direct jump is acceptable.  Since the 'S' constraint
+-;; is defined in terms of call_insn_operand, the same is true of the
+-;; constraints.
+-
+-;; When we use an indirect jump, we need a register that will be
+-;; preserved by the epilogue.
+-
+ (define_expand "sibcall"
+   [(parallel [(call (match_operand 0 "")
+ 		    (match_operand 1 ""))
+ 	      (use (match_operand 2 ""))	;; next_arg_reg
+ 	      (use (match_operand 3 ""))])]	;; struct_value_size_rtx
+-  "TARGET_SIBCALLS"
++  ""
+ {
+   rtx target = loongarch_legitimize_call_address (XEXP (operands[0], 0));
+ 
+@@ -3433,172 +3119,170 @@
+ (define_insn "sibcall_internal"
+   [(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,a,t,h"))
+ 	 (match_operand 1 "" ""))]
+-  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
++  "SIBLING_CALL_P (insn)"
+ {
+   switch (which_alternative)
+     {
+     case 0:
+       return "jr\t%0";
+     case 1:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r12,(%%pcrel(%0+0x20000))>>18\n\t"
+-               "jirl\t$r0,$r12,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.local\t$r12,$r13,%0\n\tjr\t$r12";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r12,(%%pcrel(%0+0x20000))>>18\n\t"
++	       "jirl\t$r0,$r12,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.local\t$r12,$r13,%0\n\tjr\t$r12";
+       else
+-        return "b\t%0";
++	return "b\t%0";
+     case 2:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC)
+-        return "b\t%0";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r12,$r13,%0\n\tjr\t$r12";
++      if (TARGET_CMODEL_TINY_STATIC)
++	return "b\t%0";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r12,$r13,%0\n\tjr\t$r12";
+       else
+-        return "la.global\t$r12,%0\n\tjr\t$r12";
++	return "la.global\t$r12,%0\n\tjr\t$r12";
+     case 3:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r12,$r13,%0\n\tjr\t$r12";
++      if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r12,$r13,%0\n\tjr\t$r12";
+       else
+-        return "la.global\t$r12,%0\n\tjr\t$r12";
++	return "la.global\t$r12,%0\n\tjr\t$r12";
+     case 4:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY)
+-        return "b\t%%plt(%0)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r12,(%%plt(%0)+0x20000)>>18\n\t"
+-               "jirl\t$r0,$r12,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)";
++      if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY)
++	return "b\t%%plt(%0)";
++      else if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r12,(%%plt(%0)+0x20000)>>18\n\t"
++	       "jirl\t$r0,$r12,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)";
+       else
+-        sorry ("cmodel extreme and tiny static not support plt.");
++	/* Code model "extreme" and "tiny-static" do not support plt.  */
++	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "jal" "indirect,direct,direct,direct,direct")])
++  [(set_attr "jirl" "indirect,direct,direct,direct,direct")])
+ 
+ (define_expand "sibcall_value"
+   [(parallel [(set (match_operand 0 "")
+ 		   (call (match_operand 1 "")
+ 			 (match_operand 2 "")))
+ 	      (use (match_operand 3 ""))])]		;; next_arg_reg
+-  "TARGET_SIBCALLS"
++  ""
+ {
+   rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0));
+ 
+- /*  Handle return values created by loongarch_return_fpr_pair. */
++ /*  Handle return values created by loongarch_pass_fpr_pair.  */
+   if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2)
+     {
+-      emit_call_insn (gen_sibcall_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0),
+-      target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0)));
++      rtx arg1 = XEXP (XVECEXP (operands[0],0, 0), 0);
++      rtx arg2 = XEXP (XVECEXP (operands[0],0, 1), 0);
++
++      emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target,
++							   operands[2],
++							   arg2));
+     }
+    else
+     {
+-      /*  Handle return values created by loongarch_return_fpr_single. */
++      /*  Handle return values created by loongarch_return_fpr_single.  */
+       if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1)
+-      operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0);
+-  
+-      emit_call_insn (gen_sibcall_value_internal (operands[0], target, operands[2]));
++	operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0);
++
++      emit_call_insn (gen_sibcall_value_internal (operands[0], target,
++						  operands[2]));
+     }
+   DONE;
+ })
+ 
+ (define_insn "sibcall_value_internal"
+   [(set (match_operand 0 "register_operand" "")
+-        (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h"))
+-              (match_operand 2 "" "")))]
+-  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
++	(call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h"))
++	      (match_operand 2 "" "")))]
++  "SIBLING_CALL_P (insn)"
+ {
+   switch (which_alternative)
+   {
+     case 0:
+       return "jr\t%1";
+     case 1:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t"
+-               "jirl\t$r0,$r12,%%pcrel(%1+4)-((%%pcrel(%1+4+0x20000))>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.local\t$r12,$r13,%1\n\t"
+-               "jr\t$r12";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t"
++	       "jirl\t$r0,$r12,%%pcrel(%1+4)-((%%pcrel(%1+4+0x20000))>>18<<18)";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.local\t$r12,$r13,%1\n\tjr\t$r12";
+       else
+-        return "b\t%1";
++	return "b\t%1";
+     case 2:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC)
+-        return "b\t%1";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r12,$r13,%1\n\t"
+-               "jr\t$r12";
++      if (TARGET_CMODEL_TINY_STATIC)
++	return "b\t%1";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r12,$r13,%1\n\tjr\t$r12";
+       else
+-        return "la.global\t$r12,%1\n\t"
+-               "jr\t$r12";
++	return "la.global\t$r12,%1\n\tjr\t$r12";
+     case 3:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r12,$r13,%1\n\t"
+-               "jr\t$r12";
++      if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r12,$r13,%1\n\tjr\t$r12";
+       else
+-        return "la.global\t$r12,%1\n\t"
+-               "jr\t$r12";
++	return "la.global\t$r12,%1\n\tjr\t$r12";
+     case 4:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY)
+-        return " b\t%%plt(%1)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t"
+-               "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
++      if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY)
++	return " b\t%%plt(%1)";
++      else if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t"
++	       "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
+       else
+-        sorry ("loongarch cmodel extreme and tiny-static not support plt.");
++	/* Code model "extreme" and "tiny-static" do not support plt.  */
++	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+   }
+ }
+-  [(set_attr "jal" "indirect,direct,direct,direct,direct")])
++  [(set_attr "jirl" "indirect,direct,direct,direct,direct")])
+ 
+ (define_insn "sibcall_value_multiple_internal"
+   [(set (match_operand 0 "register_operand" "")
+-        (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h"))
+-              (match_operand 2 "" "")))
++	(call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h"))
++	      (match_operand 2 "" "")))
+    (set (match_operand 3 "register_operand" "")
+ 	(call (mem:SI (match_dup 1))
+ 	      (match_dup 2)))]
+-  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
++  "SIBLING_CALL_P (insn)"
+ {
+   switch (which_alternative)
+   {
+     case 0:
+       return "jr\t%1";
+     case 1:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t"
+-               "jirl\t$r0,$r12,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.local\t$r12,$r13,%1\n\t"
+-               "jr\t$r12";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r12,%%pcrel(%1+0x20000)>>18\n\t"
++	       "jirl\t$r0,$r12,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.local\t$r12,$r13,%1\n\tjr\t$r12";
+       else
+-        return "b\t%1";
++	return "b\t%1";
+     case 2:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC)
+-        return "b\t%1";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r12,$r13,%1\n\t"
+-               "jr\t$r12";
++      if (TARGET_CMODEL_TINY_STATIC)
++	return "b\t%1";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r12,$r13,%1\n\tjr\t$r12";
+       else
+-        return "la.global\t$r12,%1\n\t"
+-               "jr\t$r12";
++	return "la.global\t$r12,%1\n\tjr\t$r12";
+     case 3:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r12,$r13,%1\n\t"
+-               "jr\t$r12";
++      if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r12,$r13,%1\n\tjr\t$r12";
+       else
+-        return "la.global\t$r12,%1\n\t"
+-               "jr\t$r12";
++	return "la.global\t$r12,%1\n\tjr\t$r12";
+     case 4:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY)
+-        return "b\t%%plt(%1)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t"
+-               "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
++      if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY)
++	return "b\t%%plt(%1)";
++      else if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r12,(%%plt(%1)+0x20000)>>18\n\t"
++	       "jirl\t$r0,$r12,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
+       else
+-        sorry ("loongarch cmodel extreme and tiny-static not support plt.");
++	/* Code model "extreme" and "tiny-static" do not support plt.  */
++	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+   }
+ }
+-  [(set_attr "jal" "indirect,direct,direct,direct,direct")])
++  [(set_attr "jirl" "indirect,direct,direct,direct,direct")])
+ 
+ (define_expand "call"
+   [(parallel [(call (match_operand 0 "")
+@@ -3612,22 +3296,6 @@
+   emit_call_insn (gen_call_internal (target, operands[1]));
+   DONE;
+ })
+-;; In the last case, we can generate the individual instructions with
+-;; a define_split.  There are several things to be wary of:
+-;;
+-;;   - We can't expose the load of $gp before reload.  If we did,
+-;;     it might get removed as dead, but reload can introduce new
+-;;     uses of $gp by rematerializing constants.
+-;;
+-;;   - We shouldn't restore $gp after calls that never return.
+-;;     It isn't valid to insert instructions between a noreturn
+-;;     call and the following barrier.
+-;;
+-;;   - The splitter deliberately changes the liveness of $gp.  The unsplit
+-;;     instruction preserves $gp and so have no effect on its liveness.
+-;;     But once we generate the separate insns, it becomes obvious that
+-;;     $gp is not live on entry to the call.
+-;;
+ 
+ (define_insn "call_internal"
+   [(call (mem:SI (match_operand 0 "call_insn_operand" "e,c,a,t,h"))
+@@ -3640,46 +3308,41 @@
+     case 0:
+       return "jirl\t$r1,%0,0";
+     case 1:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r1,%%pcrel(%0+0x20000)>>18\n\t"
+-               "jirl\t$r1,$r1,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.local\t$r1,$r12,%0\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r1,%%pcrel(%0+0x20000)>>18\n\t"
++	       "jirl\t$r1,$r1,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.local\t$r1,$r12,%0\n\tjirl\t$r1,$r1,0";
+       else
+-        return "bl\t%0";
++	return "bl\t%0";
+     case 2:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC)
+-        return "bl\t%0";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r1,$r12,%0\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_TINY_STATIC)
++	return "bl\t%0";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r1,$r12,%0\n\tjirl\t$r1,$r1,0";
+       else
+-        return "la.global\t$r1,%0\n\t"
+-               "jirl\t$r1,$r1,0";
++	return "la.global\t$r1,%0\n\tjirl\t$r1,$r1,0";
+     case 3:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r1,$r12,%0\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r1,$r12,%0\n\tjirl\t$r1,$r1,0";
+       else
+-        return "la.global\t$r1,%0\n\t"
+-               "jirl\t$r1,$r1,0";
++	return "la.global\t$r1,%0\n\tjirl\t$r1,$r1,0";
+     case 4:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r1,(%%plt(%0)+0x20000)>>18\n\t"
+-               "jirl\t$r1,$r1,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY)
+-        return "bl\t%%plt(%0)";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r1,(%%plt(%0)+0x20000)>>18\n\t"
++	       "jirl\t$r1,$r1,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)";
++      else if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY)
++	return "bl\t%%plt(%0)";
+       else
+-        sorry ("cmodel extreme and tiny-static not support plt.");
++	/* Code model "extreme" and "tiny-static" do not support plt.  */
++	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "jal" "indirect,direct,direct,direct,direct")
++  [(set_attr "jirl" "indirect,direct,direct,direct,direct")
+    (set_attr "insn_count" "1,2,3,3,2")])
+ 
+-
+ (define_expand "call_value"
+   [(parallel [(set (match_operand 0 "")
+ 		   (call (match_operand 1 "")
+@@ -3688,26 +3351,31 @@
+   ""
+ {
+   rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0));
+- /*  Handle return values created by loongarch_return_fpr_pair. */
++  /* Handle return values created by loongarch_pass_fpr_pair.  */
+   if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2)
+-    emit_call_insn (gen_call_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0),
+-    target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0)));
++    {
++      rtx arg1 = XEXP (XVECEXP (operands[0], 0, 0), 0);
++      rtx arg2 = XEXP (XVECEXP (operands[0], 0, 1), 0);
++
++      emit_call_insn (gen_call_value_multiple_internal (arg1, target,
++							operands[2], arg2));
++    }
+    else
+     {
+-      /*  Handle return values created by loongarch_return_fpr_single. */
++      /* Handle return values created by loongarch_return_fpr_single.  */
+       if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1)
+-      operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0);
+-  
+-      emit_call_insn (gen_call_value_internal (operands[0], target, operands[2]));
++	    operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0);
++
++      emit_call_insn (gen_call_value_internal (operands[0], target,
++					       operands[2]));
+     }
+   DONE;
+ })
+ 
+-;; See comment for call_internal.
+ (define_insn "call_value_internal"
+   [(set (match_operand 0 "register_operand" "")
+-        (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h"))
+-              (match_operand 2 "" "")))
++	(call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h"))
++	      (match_operand 2 "" "")))
+    (clobber (reg:SI RETURN_ADDR_REGNUM))]
+   ""
+ {
+@@ -3716,50 +3384,45 @@
+     case 0:
+       return "jirl\t$r1,%1,0";
+     case 1:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t"
+-               "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.local\t$r1,$r12,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t"
++	       "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.local\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0";
+       else
+-        return "bl\t%1";
++	return "bl\t%1";
+     case 2:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC)
+-        return "bl\t%1";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r1,$r12,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_TINY_STATIC)
++	return "bl\t%1";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0";
+       else
+-        return "la.global\t$r1,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++	return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0";
+     case 3:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r1,$r12,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0";
+       else
+-        return "la.global\t$r1,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++	return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0";
+     case 4:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t"
+-               "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY)
+-        return "bl\t%%plt(%1)";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t"
++	       "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
++      else if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY)
++	return "bl\t%%plt(%1)";
+       else
+-        sorry ("loongarch cmodel extreme and tiny-static not support plt.");
++	/* Code model "extreme" and "tiny-static" do not support plt.  */
++	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "jal" "indirect,direct,direct,direct,direct")
++  [(set_attr "jirl" "indirect,direct,direct,direct,direct")
+    (set_attr "insn_count" "1,2,3,3,2")])
+ 
+-;; See comment for call_internal.
+ (define_insn "call_value_multiple_internal"
+   [(set (match_operand 0 "register_operand" "")
+-        (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h"))
+-              (match_operand 2 "" "")))
++	(call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h"))
++	      (match_operand 2 "" "")))
+    (set (match_operand 3 "register_operand" "")
+ 	(call (mem:SI (match_dup 1))
+ 	      (match_dup 2)))
+@@ -3771,48 +3434,43 @@
+     case 0:
+       return "jirl\t$r1,%1,0";
+     case 1:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t"
+-               "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.local\t$r1,$r12,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r1,%%pcrel(%1+0x20000)>>18\n\t"
++	       "jirl\t$r1,$r1,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.local\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0";
+       else
+-        return "bl\t%1";
++	return "bl\t%1";
+     case 2:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC)
+-        return "bl\t%1";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r1,$r12,%1\n\t"
+-               "jirl\t$r1,$r1,0 ";
++      if (TARGET_CMODEL_TINY_STATIC)
++	return "bl\t%1";
++      else if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0 ";
+       else
+-        return "la.global\t$r1,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++	return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0";
+     case 3:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME)
+-        return "la.global\t$r1,$r12,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++      if (TARGET_CMODEL_EXTREME)
++	return "la.global\t$r1,$r12,%1\n\tjirl\t$r1,$r1,0";
+       else
+-        return "la.global\t$r1,%1\n\t"
+-               "jirl\t$r1,$r1,0";
++	return "la.global\t$r1,%1\n\tjirl\t$r1,$r1,0";
+     case 4:
+-      if (loongarch_cmodel_var == LARCH_CMODEL_LARGE)
+-        return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t"
+-               "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
+-      else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY)
+-        return "bl\t%%plt(%1)";
++      if (TARGET_CMODEL_LARGE)
++	return "pcaddu18i\t$r1,(%%plt(%1)+0x20000)>>18\n\t"
++	       "jirl\t$r1,$r1,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)";
++      else if (TARGET_CMODEL_NORMAL || TARGET_CMODEL_TINY)
++	return "bl\t%%plt(%1)";
+       else
+-        sorry ("loongarch cmodel extreme and tiny-static not support plt.");
++	/* Code model "extreme" and "tiny-static" do not support plt.  */
++	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "jal" "indirect,direct,direct,direct,direct")
++  [(set_attr "jirl" "indirect,direct,direct,direct,direct")
+    (set_attr "insn_count" "1,2,3,3,2")])
+ 
+ 
+ ;; Call subroutine returning any type.
+-
+ (define_expand "untyped_call"
+   [(parallel [(call (match_operand 0 "")
+ 		    (const_int 0))
+@@ -3842,105 +3500,109 @@
+ ;;  ....................
+ ;;
+ 
++(define_insn "prefetch"
++  [(prefetch (match_operand 0 "address_operand" "p")
++         (match_operand 1 "const_int_operand" "n")
++         (match_operand 2 "const_int_operand" "n"))]
++  ""
++  {
++    operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]);
++    return "preld\t%1,%a0";
++  }
++  [(set_attr "type" "prefetch")])
+ 
+ (define_insn "*prefetch_indexed_<mode>"
+-  [(prefetch (plus:P (match_operand:P 0 "register_operand" "r")
+-		     (match_operand:P 1 "register_operand" "r"))
+-	     (match_operand 2 "const_int_operand" "n")
+-	     (match_operand 3 "const_int_operand" "n"))]
+-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+-{
+-  operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]);
+-  return "prefx\t%2,%1(%0)";
+-}
++  [(prefetch (plus:P (match_operand 0 "register_operand" "r")
++             (match_operand 1 "register_operand" "r"))
++         (match_operand 2 "const_int_operand" "n")
++         (match_operand 3 "const_int_operand" "n"))]
++  ""
++  {
++    operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]);
++    return "preldx\t%2,%1,%0";
++  }
+   [(set_attr "type" "prefetchx")])
+ 
+ (define_insn "nop"
+   [(const_int 0)]
+   ""
+   "nop"
+-  [(set_attr "type"	"nop")
+-   (set_attr "mode"	"none")])
+-
+-;; Like nop, but commented out when outside a .set noreorder block.
+-(define_insn "hazard_nop"
+-  [(const_int 1)]
+-  ""
+-  {
+-    return "#nop";
+-  }
+-  [(set_attr "type"	"nop")])
++  [(set_attr "type" "nop")
++   (set_attr "mode" "none")])
+ 
+-;; The `.insn' pseudo-op.
+-(define_insn "insn_pseudo"
+-  [(unspec_volatile [(const_int 0)] UNSPEC_INSN_PSEUDO)]
+-  ""
+-  ".insn"
+-  [(set_attr "mode" "none")
+-   (set_attr "insn_count" "0")])
+-
+-;; Conditional move instructions.
++;; __builtin_loongarch_movfcsr2gr: move the FCSR into operand 0.
++(define_insn "loongarch_movfcsr2gr"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand 1 "const_uimm5_operand")]
++			     UNSPECV_MOVFCSR2GR))]
++  "TARGET_HARD_FLOAT"
++  "movfcsr2gr\t%0,$r%1")
+ 
+-(define_insn "*sel<code><GPR:mode>_using_<GPR2:mode>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+-	(if_then_else:GPR
+-	 (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r")
+-			   (const_int 0))
+-	 (match_operand:GPR 2 "reg_or_0_operand" "r,J")
+-	 (match_operand:GPR 3 "reg_or_0_operand" "J,r")))]
+-  "register_operand (operands[2], <GPR:MODE>mode)
+-       != register_operand (operands[3], <GPR:MODE>mode)"
+-  "@
+-   <sel>\t%0,%2,%1
+-   <selinv>\t%0,%3,%1"
+-  [(set_attr "type" "condmove")
+-   (set_attr "mode" "<GPR:MODE>")])
++;; __builtin_loongarch_movgr2fcsr: move operand 0 into the FCSR.
++(define_insn "loongarch_movgr2fcsr"
++  [(unspec_volatile [(match_operand 0 "const_uimm5_operand")
++		     (match_operand:SI 1 "register_operand" "r")]
++		     UNSPECV_MOVGR2FCSR)]
++  "TARGET_HARD_FLOAT"
++  "movgr2fcsr\t$r%0,%1")
+ 
+-;; sel.fmt copies the 3rd argument when the 1st is non-zero and the 2nd
+-;; argument if the 1st is zero.  This means operand 2 and 3 are
+-;; inverted in the instruction.
++(define_insn "fclass_<fmt>"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
++		      UNSPEC_FCLASS))]
++  "TARGET_HARD_FLOAT"
++  "fclass.<fmt>\t%0,%1"
++  [(set_attr "type" "unknown")
++   (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "*sel<mode>"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-	(if_then_else:SCALARF
+-	 (ne:FCC (match_operand:FCC 1 "register_operand" "z")
+-		 (const_int 0))
+-	 (match_operand:SCALARF 2 "reg_or_0_operand" "f")
+-	 (match_operand:SCALARF 3 "reg_or_0_operand" "f")))]
++(define_insn "bytepick_w"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")
++		    (match_operand:SI 3 "const_0_to_3_operand" "n")]
++		    UNSPEC_BYTEPICK_W))]
+   ""
+-  "fsel\t%0,%3,%2,%1"
+-  [(set_attr "type" "condmove")
+-   (set_attr "mode" "<SCALARF:MODE>")])
++  "bytepick.w\t%0,%1,%2,%z3"
++  [(set_attr "mode" "SI")])
+ 
+-;; These are the main define_expand's used to make conditional moves.
++(define_insn "bytepick_d"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "register_operand" "r")
++		    (match_operand:DI 3 "const_0_to_7_operand" "n")]
++		    UNSPEC_BYTEPICK_D))]
++  ""
++  "bytepick.d\t%0,%1,%2,%z3"
++  [(set_attr "mode" "DI")])
+ 
+-(define_expand "mov<mode>cc"
+-  [(set (match_operand:GPR 0 "register_operand")
+-   (if_then_else:GPR (match_operator 1 "comparison_operator"
+-                    [(match_operand:GPR 2 "reg_or_0_operand")
+-                     (match_operand:GPR 3 "reg_or_0_operand")])))]
+-  "TARGET_COND_MOVE_INT"
+-{
+-  if (!INTEGRAL_MODE_P (GET_MODE (XEXP (operands[1], 0))))
+-    FAIL;
++(define_insn "bitrev_4b"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
++		    UNSPEC_BITREV_4B))]
++  ""
++  "bitrev.4b\t%0,%1"
++  [(set_attr "type" "unknown")
++   (set_attr "mode" "SI")])
+ 
+-  loongarch_expand_conditional_move (operands);
+-  DONE;
+-})
++(define_insn "bitrev_8b"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")]
++		    UNSPEC_BITREV_8B))]
++  ""
++  "bitrev.8b\t%0,%1"
++  [(set_attr "type" "unknown")
++   (set_attr "mode" "DI")])
+ 
+-(define_expand "mov<mode>cc"
+-  [(set (match_operand:SCALARF 0 "register_operand")
+-        (if_then_else:SCALARF (match_operator 1 "comparison_operator"
+-                             [(match_operand:SCALARF 2 "reg_or_0_operand")
+-                              (match_operand:SCALARF 3 "reg_or_0_operand")])))]
+-  "TARGET_COND_MOVE_FLOAT"
+-{
+-  if (!FLOAT_MODE_P (GET_MODE (XEXP (operands[1], 0))))
+-    FAIL;
++(define_insn "stack_tie<mode>"
++  [(set (mem:BLK (scratch))
++	(unspec:BLK [(match_operand:X 0 "register_operand" "r")
++		     (match_operand:X 1 "register_operand" "r")]
++		     UNSPEC_TIE))]
++  ""
++  ""
++  [(set_attr "length" "0")
++   (set_attr "type" "ghost")])
+ 
+-  loongarch_expand_conditional_move (operands);
+-  DONE;
+-})
+ 
+ (define_split
+   [(match_operand 0 "small_data_pattern")]
+@@ -3948,97 +3610,30 @@
+   [(match_dup 0)]
+   { operands[0] = loongarch_rewrite_small_data (operands[0]); })
+ 
+-;; Thread-Local Storage
+-
+-(define_insn "got_load_tls_gd<mode>"
+-  [(set (match_operand:P      0 "register_operand" "=r")
+-	(unspec:P
+-	    [(match_operand:P 1 "symbolic_operand" "")]
+-	    UNSPEC_TLS_GD))]
+-  ""
+-  "la.tls.gd\t%0,%1"
+-  [(set_attr "got" "load")
+-   (set_attr "mode" "<MODE>")])
+- 
+-(define_insn "got_load_tls_ld<mode>"
+-  [(set (match_operand:P      0 "register_operand" "=r")
+-	(unspec:P
+-	    [(match_operand:P 1 "symbolic_operand" "")]
+-	    UNSPEC_TLS_LD))]
+-  ""
+-  "la.tls.ld\t%0,%1"
+-  [(set_attr "got" "load")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "got_load_tls_le<mode>"
+-  [(set (match_operand:P      0 "register_operand" "=r")
+-	(unspec:P
+-	    [(match_operand:P 1 "symbolic_operand" "")]
+-	    UNSPEC_TLS_LE))]
+-  ""
+-  "la.tls.le\t%0,%1"
+-  [(set_attr "got" "load")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "got_load_tls_ie<mode>"
+-  [(set (match_operand:P      0 "register_operand" "=r")
+-	(unspec:P
+-	    [(match_operand:P 1 "symbolic_operand" "")]
+-	    UNSPEC_TLS_IE))]
+-  ""
+-  "la.tls.ie\t%0,%1"
+-  [(set_attr "got" "load")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "loongarch_movfcsr2gr"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
+-    (unspec_volatile:SI [(match_operand 1 "const_uimm5_operand")] UNSPEC_MOVFCSR2GR))]
+-  "TARGET_HARD_FLOAT"
+-  "movfcsr2gr\t%0,$r%1")
+-
+-(define_insn "loongarch_movgr2fcsr"
+-  [(unspec_volatile [(match_operand 0 "const_uimm5_operand")
+-                    (match_operand:SI 1 "register_operand" "r")]
+-          UNSPEC_MOVGR2FCSR)]
+-  "TARGET_HARD_FLOAT"
+-  "movgr2fcsr\t$r%0,%1")
+-
+ 
+ ;; Match paired HI/SI/SF/DFmode load/stores.
+ (define_insn "*join2_load_store<JOIN_MODE:mode>"
+-  [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand" "=r,f,m,m,r,ZC")
++  [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand"
++  "=&r,f,m,m,&r,ZC")
+ 	(match_operand:JOIN_MODE 1 "nonimmediate_operand" "m,m,r,f,ZC,r"))
+-   (set (match_operand:JOIN_MODE 2 "nonimmediate_operand" "=r,f,m,m,r,ZC")
++   (set (match_operand:JOIN_MODE 2 "nonimmediate_operand"
++   "=r,f,m,m,r,ZC")
+ 	(match_operand:JOIN_MODE 3 "nonimmediate_operand" "m,m,r,f,ZC,r"))]
+   "reload_completed"
+   {
+-    bool load_p = (which_alternative == 0 || which_alternative == 1);
+-    /* Reg-renaming pass reuses base register if it is dead after bonded loads.
+-       Hardware does not bond those loads, even when they are consecutive.
+-       However, order of the loads need to be checked for correctness.  */
+-    if (!load_p || !reg_overlap_mentioned_p (operands[0], operands[1]))
+-      {
+-	output_asm_insn (loongarch_output_move (operands[0], operands[1]),
+-			 operands);
+-	output_asm_insn (loongarch_output_move (operands[2], operands[3]),
+-			 &operands[2]);
+-      }
+-    else
+-      {
+-	output_asm_insn (loongarch_output_move (operands[2], operands[3]),
+-			 &operands[2]);
+-	output_asm_insn (loongarch_output_move (operands[0], operands[1]),
+-			 operands);
+-      }
++    /* The load destination does not overlap the source.  */
++    gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
++    output_asm_insn (loongarch_output_move (operands[0], operands[1]),
++		     operands);
++    output_asm_insn (loongarch_output_move (operands[2], operands[3]),
++		     &operands[2]);
+     return "";
+   }
+-  [(set_attr "move_type" "load,fpload,store,fpstore,load,store")
++  [(set_attr "move_type"
++  "load,fpload,store,fpstore,load,store")
+    (set_attr "insn_count" "2,2,2,2,2,2")])
+ 
+-;; 2 HI/SI/SF/DF loads are joined.
+-;; P5600 does not support bonding of two LBs, hence QI mode is not included.
+-;; The loads must be non-volatile as they might be reordered at the time of asm
+-;; generation.
++;; 2 HI/SI/SF/DF loads are bonded.
+ (define_peephole2
+   [(set (match_operand:JOIN_MODE 0 "register_operand")
+ 	(match_operand:JOIN_MODE 1 "non_volatile_mem_operand"))
+@@ -4051,8 +3646,7 @@
+ 		   (match_dup 3))])]
+   "")
+ 
+-;; 2 HI/SI/SF/DF stores are joined.
+-;; P5600 does not support bonding of two SBs, hence QI mode is not included.
++;; 2 HI/SI/SF/DF stores are bonded.
+ (define_peephole2
+   [(set (match_operand:JOIN_MODE 0 "memory_operand")
+ 	(match_operand:JOIN_MODE 1 "register_operand"))
+@@ -4067,25 +3661,16 @@
+ 
+ ;; Match paired HImode loads.
+ (define_insn "*join2_loadhi"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
++  [(set (match_operand:SI 0 "register_operand" "=&r")
+ 	(any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand" "m")))
+    (set (match_operand:SI 2 "register_operand" "=r")
+ 	(any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand" "m")))]
+   "reload_completed"
+   {
+-    /* Reg-renaming pass reuses base register if it is dead after bonded loads.
+-       Hardware does not bond those loads, even when they are consecutive.
+-       However, order of the loads need to be checked for correctness.  */
+-    if (!reg_overlap_mentioned_p (operands[0], operands[1]))
+-      {
+-	output_asm_insn ("ld.h<u>\t%0,%1", operands);
+-	output_asm_insn ("ld.h<u>\t%2,%3", operands);
+-      }
+-    else
+-      {
+-	output_asm_insn ("ld.h<u>\t%2,%3", operands);
+-	output_asm_insn ("ld.h<u>\t%0,%1", operands);
+-      }
++    /* The load destination does not overlap the source.  */
++    gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
++    output_asm_insn ("ld.h<u>\t%0,%1", operands);
++    output_asm_insn ("ld.h<u>\t%2,%3", operands);
+ 
+     return "";
+   }
+@@ -4093,7 +3678,7 @@
+    (set_attr "insn_count" "2")])
+ 
+ 
+-;; 2 HI loads are joined.
++;; 2 HI loads are bonded.
+ (define_peephole2
+   [(set (match_operand:SI 0 "register_operand")
+ 	(any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand")))
+@@ -4107,153 +3692,10 @@
+   "")
+ 
+ 
+-;; Logical AND NOT.
+-(define_insn "loongson_gsandn<mode>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-        (and:GPR
+-	 (not:GPR (match_operand:GPR 1 "register_operand" "r"))
+-	 (match_operand:GPR 2 "register_operand" "r")))]
+-  ""
+-  "andn\t%0,%2,%1"
+-  [(set_attr "type" "logical")])
+-
+-;; Logical AND NOT.
+-(define_insn "loongson_gsorn<mode>"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-        (ior:GPR
+-	 (not:GPR (match_operand:GPR 1 "register_operand" "r"))
+-	 (match_operand:GPR 2 "register_operand" "r")))]
+-  ""
+-  "orn\t%0,%2,%1"
+-  [(set_attr "type" "logical")])
+- 
+-(define_insn "smax<mode>3"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-       (smax:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+-                  (match_operand:SCALARF 2 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT"
+-  "fmax.<fmt>\t%0,%1,%2"
+-  [(set_attr "type" "fmove")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "smin<mode>3"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-       (smin:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+-                  (match_operand:SCALARF 2 "register_operand" "f")))]
+-  "TARGET_HARD_FLOAT"
+-  "fmin.<fmt>\t%0,%1,%2"
+-  [(set_attr "type" "fmove")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "smaxa<mode>3"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-       (if_then_else:SCALARF
+-              (gt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f"))
+-                  (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f")))
+-              (match_dup 1)
+-              (match_dup 2)))]
+-  "TARGET_HARD_FLOAT"
+-  "fmaxa.<fmt>\t%0,%1,%2"
+-  [(set_attr "type" "fmove")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "smina<mode>3"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-       (if_then_else:SCALARF
+-               (lt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f"))
+-                   (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f")))
+-               (match_dup 1)
+-               (match_dup 2)))]
+-  "TARGET_HARD_FLOAT"
+-  "fmina.<fmt>\t%0,%1,%2"
+-  [(set_attr "type" "fmove")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "frint_<fmt>"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-       (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")]
+-                    UNSPEC_FRINT))]
+-  ""
+-  "frint.<fmt>\t%0,%1"
+-  [(set_attr "type" "fcvt")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "fclass_<fmt>"
+-  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+-       (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")]
+-                        UNSPEC_FCLASS))]
+-  ""
+-  "fclass.<fmt>\t%0,%1"
+-  [(set_attr "type" "unknown")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "bytepick_w"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
+-       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+-                   (match_operand:SI 2 "register_operand" "r")
+-                   (match_operand:SI 3 "const_0_to_3_operand" "n")]
+-                  UNSPEC_BYTEPICK_W))]
+-  ""
+-  "bytepick.w\t%0,%1,%2,%z3"
+-  [(set_attr "type"    "dspalu")
+-   (set_attr "mode"    "SI")])
+-
+-(define_insn "bytepick_d"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+-       (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+-                   (match_operand:DI 2 "register_operand" "r")
+-                   (match_operand:DI 3 "const_0_to_7_operand" "n")]
+-                  UNSPEC_BYTEPICK_D))]
+-  ""
+-  "bytepick.d\t%0,%1,%2,%z3"
+-  [(set_attr "type"    "dspalu")
+-   (set_attr "mode"    "DI")])
+-
+-(define_insn "bitrev_4b"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
+-       (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+-                  UNSPEC_BITREV_4B))]
+-  ""
+-  "bitrev.4b\t%0,%1"
+-  [(set_attr "type"    "unknown")
+-   (set_attr "mode"    "SI")])
+-
+-(define_insn "bitrev_8b"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+-       (unspec:DI [(match_operand:DI 1 "register_operand" "r")]
+-                  UNSPEC_BITREV_8B))]
+-  ""
+-  "bitrev.8b\t%0,%1"
+-  [(set_attr "type"    "unknown")
+-   (set_attr "mode"    "DI")])
+-
+-
+-
+-(define_insn "lu32i_d"
+-  [(set (match_operand:DI   0 "register_operand" "=r")
+-        (ior:DI
+-           (zero_extend:DI
+-             (subreg:SI (match_operand:DI 1 "register_operand" "0") 0))
+-           (match_operand:DI 2 "const_lu32i_operand" "u")))]
+-  "TARGET_64BIT"
+-  "lu32i.d\t%0,%X2>>32"
+-  [(set_attr "type" "arith")
+-   (set_attr "mode" "DI")])
+-
+-(define_insn "lu52i_d"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+-        (ior:DI
+-          (and:DI (match_operand:DI 1 "register_operand" "r")
+-                  (match_operand 2 "lu52i_mask_operand"))
+-          (match_operand 3 "const_lu52i_operand" "v")))]
+-    "TARGET_64BIT"
+-    "lu52i.d\t%0,%1,%X3>>52"
+-    [(set_attr "type" "arith")
+-     (set_attr "mode" "DI")])
+ 
+ (define_mode_iterator QHSD [QI HI SI DI])
+ 
+-(define_insn "crc_w_<size>_w"
++(define_insn "loongarch_crc_w_<size>_w"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec:SI [(match_operand:QHSD 1 "register_operand" "r")
+ 		   (match_operand:SI 2 "register_operand" "r")]
+@@ -4263,7 +3705,7 @@
+   [(set_attr "type" "unknown")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "crcc_w_<size>_w"
++(define_insn "loongarch_crcc_w_<size>_w"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec:SI [(match_operand:QHSD 1 "register_operand" "r")
+ 		   (match_operand:SI 2 "register_operand" "r")]
+@@ -4277,6 +3719,9 @@
+ 
+ (include "sync.md")
+ 
++(include "generic.md")
++(include "la464.md")
++
+ ; The LoongArch SX Instructions.
+ (include "lsx.md")
+ 
+@@ -4286,35 +3731,6 @@
+ ; The LoongArch ASX Instructions.
+ (include "lasx.md")
+ 
+-;; Is copying of this instruction disallowed?
+-(define_attr "cannot_copy" "no,yes" (const_string "no"))
+-
+-(define_insn "stack_tie<mode>"
+-  [(set (mem:BLK (scratch))
+-	(unspec:BLK [(match_operand:X 0 "register_operand" "r")
+-		     (match_operand:X 1 "register_operand" "r")]
+-		    UNSPEC_TIE))]
+-  ""
+-  ""
+-  [(set_attr "length" "0")]
+-)
+-
+-(define_insn "gpr_save"
+-  [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_SAVE)
+-   (clobber (reg:SI T0_REGNUM))
+-   (clobber (reg:SI T1_REGNUM))]
+-  ""
+-  { return loongarch_output_gpr_save (INTVAL (operands[0])); })
+-
+-(define_insn "gpr_restore"
+-  [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_RESTORE)]
+-  ""
+-  "tail\t__loongarch_restore_%0")
+-
+-(define_insn "gpr_restore_return"
+-  [(return)
+-   (use (match_operand 0 "pmode_register_operand" ""))
+-   (const_int 0)]
+-  ""
+-  "")
+-
++(define_c_enum "unspec" [
++  UNSPEC_ADDRESS_FIRST
++])
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index 660de3674..075a2d6c7 100644
+--- a/gcc/config/loongarch/loongarch.opt
++++ b/gcc/config/loongarch/loongarch.opt
+@@ -1,6 +1,14 @@
+-
++; Generated by "genstr" from the template "loongarch.opt.in"
++; and definitions from "loongarch-strings".
++;
++; Please do not edit this file directly.
++; It will be automatically updated during a gcc build
++; if you change "loongarch.opt.in" or "loongarch-strings".
++;
++; Generated by "genstr" from the template "loongarch.opt.in"
++; and definitions from "loongarch-strings".
+ ;
+-; Copyright (C) 2005-2018 Free Software Foundation, Inc.
++; Copyright (C) 2020-2022 Free Software Foundation, Inc.
+ ;
+ ; This file is part of GCC.
+ ;
+@@ -17,155 +25,225 @@
+ ; You should have received a copy of the GNU General Public License
+ ; along with GCC; see the file COPYING3.  If not see
+ ; <http://www.gnu.org/licenses/>.
++;
+ 
+ HeaderInclude
+ config/loongarch/loongarch-opts.h
+ 
+-mabi=
+-Target RejectNegative Joined Enum(loongarch_abi) Var(loongarch_abi) Init(LARCH_ABI_DEFAULT)
+--mabi=ABI	Generate code that conforms to the given ABI.
++HeaderInclude
++config/loongarch/loongarch-str.h
+ 
++TargetVariable
++unsigned int recip_mask = 0
++
++; ISA related options
++;; Base ISA
+ Enum
+-Name(loongarch_abi) Type(int)
+-Known Loongarch ABIs (for use with the -mabi= option):
++Name(isa_base) Type(int)
++Basic ISAs of LoongArch:
+ 
+ EnumValue
+-Enum(loongarch_abi) String(lp32) Value(ABILP32)
++Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100)
++
++;; ISA extensions / adjustments
++Enum
++Name(isa_ext_fpu) Type(int)
++FPU types of LoongArch:
+ 
+ EnumValue
+-Enum(loongarch_abi) String(lpx32) Value(ABILPX32)
++Enum(isa_ext_fpu) String(none) Value(ISA_EXT_NONE)
+ 
+ EnumValue
+-Enum(loongarch_abi) String(lp64) Value(ABILP64)
++Enum(isa_ext_fpu) String(32) Value(ISA_EXT_FPU32)
+ 
+-march=
+-Target RejectNegative Joined Var(loongarch_arch_option) ToLower Enum(loongarch_arch_opt_value)
+--march=ISA     Generate code for the given ISA.
++EnumValue
++Enum(isa_ext_fpu) String(64) Value(ISA_EXT_FPU64)
+ 
+-mbranch-cost=
+-Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
+--mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
++mfpu=
++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET)
++-mfpu=FPU	Generate code for the given FPU.
+ 
+-mcheck-zero-division
+-Target Report Mask(CHECK_ZERO_DIV)
+-Trap on integer divide by zero.
++mfpu=0
++Target RejectNegative Alias(mfpu=,none)
++
++msoft-float
++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(msingle-float)
++Prevent the use of all hardware floating-point instructions.
++
++msingle-float
++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(mdouble-float)
++Restrict the use of hardware floating-point instructions to 32-bit operations.
+ 
+ mdouble-float
+-Target Report RejectNegative InverseMask(SINGLE_FLOAT, DOUBLE_FLOAT)
++Target Driver Defer Var(la_deferred_options) RejectNegative Negative(msoft-float)
+ Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations.
+ 
+-mflush-func=
+-Target RejectNegative Joined Var(loongarch_cache_flush_func) Init(CACHE_FLUSH_FUNC)
+--mflush-func=FUNC	Use FUNC to flush the cache before calling stack trampolines.
++Enum
++Name(isa_ext_simd) Type(int)
++SIMD extension levels of LoongArch:
++
++EnumValue
++Enum(isa_ext_simd) String(none) Value(ISA_EXT_NONE)
++
++EnumValue
++Enum(isa_ext_simd) String(lsx) Value(ISA_EXT_SIMD_LSX)
+ 
+-Mask(64BIT)
++EnumValue
++Enum(isa_ext_simd) String(lasx) Value(ISA_EXT_SIMD_LASX)
+ 
+-Mask(FLOAT64)
++msimd=
++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET)
++-msimd=SIMD	Generate code for the given SIMD extension.
+ 
+-mhard-float
+-Target Report RejectNegative InverseMask(SOFT_FLOAT_ABI, HARD_FLOAT_ABI)
+-Allow the use of hardware floating-point ABI and instructions.
++mlsx
++Target Driver Defer Var(la_deferred_options)
++Enable LoongArch SIMD Extension (LSX, 128-bit).
+ 
+-mlong-calls
+-Target Report Var(TARGET_LONG_CALLS)
+-Use indirect calls.
++mlasx
++Target Driver Defer Var(la_deferred_options)
++Enable LoongArch Advanced SIMD Extension (LASX, 256-bit).
+ 
+-mmemcpy
+-Target Report Mask(MEMCPY)
+-Don't optimize block moves.
++;; Base target models (implies ISA & tune parameters)
++Enum
++Name(cpu_type) Type(int)
++LoongArch CPU types:
+ 
+-mno-float
+-Target Report RejectNegative Var(TARGET_NO_FLOAT) Condition(TARGET_SUPPORTS_NO_FLOAT)
+-Prevent the use of all floating-point operations.
++EnumValue
++Enum(cpu_type) String(native) Value(CPU_NATIVE)
+ 
+-mno-flush-func
+-Target RejectNegative
+-Do not use a cache-flushing function before calling stack trampolines.
++EnumValue
++Enum(cpu_type) String(abi-default) Value(CPU_ABI_DEFAULT)
+ 
+-mrelax-pic-calls
+-Target Report Mask(RELAX_PIC_CALLS)
+-Try to allow the linker to turn PIC calls into direct calls.
++EnumValue
++Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64)
+ 
+-mshared
+-Target Report Var(TARGET_SHARED) Init(1)
+-When generating -mabicalls code, make the code suitable for use in shared libraries.
++EnumValue
++Enum(cpu_type) String(la664) Value(CPU_LA664)
+ 
+-msingle-float
+-Target Report RejectNegative Mask(SINGLE_FLOAT)
+-Restrict the use of hardware floating-point instructions to 32-bit operations.
++EnumValue
++Enum(cpu_type) String(la464) Value(CPU_LA464)
+ 
+-msoft-float
+-Target Report RejectNegative Mask(SOFT_FLOAT_ABI)
+-Prevent the use of all hardware floating-point instructions.
++EnumValue
++Enum(cpu_type) String(la264) Value(CPU_LA264)
++
++EnumValue
++Enum(cpu_type) String(la364) Value(CPU_LA364)
+ 
+-mlra
+-Target Report Var(loongarch_lra_flag) Init(1) Save
+-Use LRA instead of reload.
++march=
++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
++-march=PROCESSOR	Generate code for the given PROCESSOR ISA.
+ 
+ mtune=
+-Target RejectNegative Joined Var(loongarch_tune_option) ToLower Enum(loongarch_arch_opt_value)
+--mtune=PROCESSOR	Optimize the output for PROCESSOR.
++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET)
++-mtune=PROCESSOR	Generate optimized code for PROCESSOR.
+ 
+-mframe-header-opt
+-Target Report Var(flag_frame_header_optimization) Optimization
+-Optimize frame header.
+ 
+-noasmopt
+-Driver
++; ABI related options
++; (ISA constraints on ABI are handled dynamically)
+ 
+-mstrict-align
+-Target Report Mask(STRICT_ALIGN) Save
+-Do not generate unaligned memory accesses.
++;; Base ABI
++Enum
++Name(abi_base) Type(int)
++Base ABI types for LoongArch:
+ 
+-mlsx
+-Target Report Mask(LSX)
+-Use LoongArch SX Extension instructions.
++EnumValue
++Enum(abi_base) String(lp64d) Value(ABI_BASE_LP64D)
+ 
+-mlasx
+-Target Report Var(TARGET_LASX)
+-Use LoongArch ASX Extension instructions.
++EnumValue
++Enum(abi_base) String(lp64f) Value(ABI_BASE_LP64F)
+ 
+-malign-llsc-target
+-Target Report Var(TARGET_ALIGN_LLSC_TARGET) 
+-Target align llsc target.
++EnumValue
++Enum(abi_base) String(lp64s) Value(ABI_BASE_LP64S)
+ 
+-mmax-inline-memcpy-size=
+-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
+--mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
++mabi=
++Target RejectNegative Joined ToLower Enum(abi_base) Var(la_opt_abi_base) Init(M_OPT_UNSET)
++-mabi=BASEABI	Generate code that conforms to the given BASEABI.
++
++;; Legacy option: -mabi=lp64
++mabi=lp64
++Target RejectNegative Mask(LP64)
++-mabi=lp64	Legacy option that enables the lp64 integer ABI.
++
++;; ABI Extension
++Variable
++int la_opt_abi_ext = M_OPT_UNSET
++
++mbranch-cost=
++Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
++-mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
+ 
+ mvecarg
+ Target Report Var(TARGET_VECARG) Init(1)
+ Target pass vect arg uses vector register.
+ 
++mmemvec-cost=
++Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
++mmemvec-cost=COST      Set the cost of vector memory access instructions.
++
++mveclibabi=
++Target RejectNegative Joined Var(loongarch_veclibabi_name)
++Vector library ABI to use.
++
++mstackrealign
++Target Var(loongarch_stack_realign) Init(1)
++Realign stack in prologue.
++
++mforce-drap
++Target Var(loongarch_force_drap) Init(0)
++Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack.
++
++mcheck-zero-division
++Target Mask(CHECK_ZERO_DIV)
++Trap on integer divide by zero.
++
+ mcond-move-int
+-Target Report Var(TARGET_COND_MOVE_INT) Init(1)
++Target Var(TARGET_COND_MOVE_INT) Init(1)
+ Conditional moves for integral are enabled.
+ 
+ mcond-move-float
+-Target Report Var(TARGET_COND_MOVE_FLOAT) Init(1)
++Target Var(TARGET_COND_MOVE_FLOAT) Init(1)
+ Conditional moves for float are enabled.
+ 
+-; The code model option names for -mcmodel.
++mmemcpy
++Target Mask(MEMCPY)
++Prevent optimizing block moves, which is also the default behavior of -Os.
+ 
++mstrict-align
++Target Var(TARGET_STRICT_ALIGN) Init(0)
++Do not generate unaligned memory accesses.
++
++mmax-inline-memcpy-size=
++Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
++-mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
++
++mrecip
++Target Report RejectNegative Var(loongarch_recip)
++Generate reciprocals instead of divss and sqrtss.
++
++mrecip=
++Target Report RejectNegative Joined Var(loongarch_recip_name)
++Control generation of reciprocal estimates.
++
++; The code model option names for -mcmodel.
+ Enum
+-Name(cmodel) Type(enum loongarch_code_model)
++Name(cmodel) Type(int)
+ The code model option names for -mcmodel:
+ 
+ EnumValue
+-Enum(cmodel) String(normal) Value(LARCH_CMODEL_NORMAL)
++Enum(cmodel) String(normal) Value(CMODEL_NORMAL)
+ 
+ EnumValue
+-Enum(cmodel) String(tiny) Value(LARCH_CMODEL_TINY)
++Enum(cmodel) String(tiny) Value(CMODEL_TINY)
+ 
+ EnumValue
+-Enum(cmodel) String(tiny-static) Value(LARCH_CMODEL_TINY_STATIC)
++Enum(cmodel) String(tiny-static) Value(CMODEL_TINY_STATIC)
+ 
+ EnumValue
+-Enum(cmodel) String(large) Value(LARCH_CMODEL_LARGE)
++Enum(cmodel) String(large) Value(CMODEL_LARGE)
+ 
+ EnumValue
+-Enum(cmodel) String(extreme) Value(LARCH_CMODEL_EXTREME)
++Enum(cmodel) String(extreme) Value(CMODEL_EXTREME)
+ 
+ mcmodel=
+-Target RejectNegative Joined Enum(cmodel) Var(loongarch_cmodel_var) Init(LARCH_CMODEL_NORMAL) Save
++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET)
+ Specify the code model.
+diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
+index 1f7034366..2b1d6f109 100644
+--- a/gcc/config/loongarch/lsx.md
++++ b/gcc/config/loongarch/lsx.md
+@@ -168,6 +168,9 @@
+ ;; As ILSX but excludes V16QI.
+ (define_mode_iterator ILSX_DWH [V2DI V4SI V8HI])
+ 
++;; As LSX but excludes V16QI.
++(define_mode_iterator LSX_DWH  [V2DF V4SF V2DI V4SI V8HI])
++
+ ;; As ILSX but excludes V2DI.
+ (define_mode_iterator ILSX_WHB [V4SI V8HI V16QI])
+ 
+@@ -291,6 +294,10 @@
+    (V2DI "d")
+    (V4SI "s")])
+ 
++(define_mode_attr flsxfrint
++  [(V2DF "d")
++   (V4SF "s")])
++
+ (define_mode_attr ilsxfmt
+   [(V2DF "l")
+    (V4SF "w")])
+@@ -327,6 +334,38 @@
+    (V4SI  "uimm5")
+    (V2DI  "uimm6")])
+ 
++
++(define_int_iterator FRINT_S [UNSPEC_LSX_VFRINTRP_S
++			    UNSPEC_LSX_VFRINTRZ_S
++			    UNSPEC_LSX_VFRINT 
++			    UNSPEC_LSX_VFRINTRM_S])
++
++(define_int_iterator FRINT_D [UNSPEC_LSX_VFRINTRP_D
++			    UNSPEC_LSX_VFRINTRZ_D
++			    UNSPEC_LSX_VFRINT 
++			    UNSPEC_LSX_VFRINTRM_D])
++
++(define_int_attr frint_pattern_s
++  [(UNSPEC_LSX_VFRINTRP_S  "ceil")
++   (UNSPEC_LSX_VFRINTRZ_S  "btrunc")
++   (UNSPEC_LSX_VFRINT	   "rint")
++   (UNSPEC_LSX_VFRINTRM_S  "floor")])
++
++(define_int_attr frint_pattern_d
++  [(UNSPEC_LSX_VFRINTRP_D  "ceil")
++   (UNSPEC_LSX_VFRINTRZ_D  "btrunc")
++   (UNSPEC_LSX_VFRINT	   "rint")
++   (UNSPEC_LSX_VFRINTRM_D  "floor")])
++
++(define_int_attr frint_suffix
++  [(UNSPEC_LSX_VFRINTRP_S  "rp")
++   (UNSPEC_LSX_VFRINTRP_D  "rp")
++   (UNSPEC_LSX_VFRINTRZ_S  "rz")
++   (UNSPEC_LSX_VFRINTRZ_D  "rz")
++   (UNSPEC_LSX_VFRINT	   "")
++   (UNSPEC_LSX_VFRINTRM_S  "rm")
++   (UNSPEC_LSX_VFRINTRM_D  "rm")])
++
+ (define_expand "vec_init<mode><unitmode>"
+   [(match_operand:LSX 0 "register_operand")
+    (match_operand:LSX 1 "")]
+@@ -513,12 +552,12 @@
+   DONE;
+ })
+ 
+-(define_insn "lsx_vinsgr2vr_<lsxfmt_f>"
+-  [(set (match_operand:LSX 0 "register_operand" "=f")
+-	(vec_merge:LSX
+-	  (vec_duplicate:LSX
++(define_insn "lsx_vinsgr2vr_<lsxfmt>"
++  [(set (match_operand:ILSX 0 "register_operand" "=f")
++	(vec_merge:ILSX
++	  (vec_duplicate:ILSX
+ 	    (match_operand:<UNITMODE> 1 "reg_or_0_operand" "rJ"))
+-	  (match_operand:LSX 2 "register_operand" "0")
++	  (match_operand:ILSX 2 "register_operand" "0")
+ 	  (match_operand 3 "const_<bitmask>_operand" "")))]
+   "ISA_HAS_LSX"
+ {
+@@ -688,11 +727,23 @@
+   DONE;
+ })
+ 
+-(define_insn "lsx_vshuf_<lsxfmt>"
+-  [(set (match_operand:ILSX_DWH 0 "register_operand" "=f")
+-	(unspec:ILSX_DWH [(match_operand:ILSX_DWH 1 "register_operand" "0")
+-                     (match_operand:ILSX_DWH 2 "register_operand" "f")
+-		     (match_operand:ILSX_DWH 3 "register_operand" "f")]
++(define_expand "vec_perm<mode>"
++ [(match_operand:LSX 0 "register_operand")
++  (match_operand:LSX 1 "register_operand")
++  (match_operand:LSX 2 "register_operand")
++  (match_operand:LSX 3 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  loongarch_expand_vec_perm (operands[0], operands[1],
++      operands[2], operands[3]);
++  DONE;
++})
++
++(define_insn "lsx_vshuf_<lsxfmt_f>"
++  [(set (match_operand:LSX_DWH 0 "register_operand" "=f")
++	(unspec:LSX_DWH [(match_operand:LSX_DWH 1 "register_operand" "0")
++                     (match_operand:LSX_DWH 2 "register_operand" "f")
++		     (match_operand:LSX_DWH 3 "register_operand" "f")]
+ 		    UNSPEC_LSX_VSHUF))]
+   "ISA_HAS_LSX"
+   "vshuf.<lsxfmt>\t%w0,%w2,%w3"
+@@ -731,7 +782,7 @@
+   [(set (match_operand:LSX 0 "nonimmediate_operand")
+ 	(match_operand:LSX 1 "move_operand"))]
+   "reload_completed && ISA_HAS_LSX
+-   && loongarch_split_move_insn_p (operands[0], operands[1], insn)"
++   && loongarch_split_move_insn_p (operands[0], operands[1])"
+   [(const_int 0)]
+ {
+   loongarch_split_move_insn (operands[0], operands[1], curr_insn);
+@@ -996,7 +1047,25 @@
+   [(set_attr "type" "simd_fmul")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "div<mode>3"
++(define_expand "div<mode>3"
++  [(set (match_operand:FLSX 0 "register_operand")
++    (div:FLSX (match_operand:FLSX 1 "register_operand")
++          (match_operand:FLSX 2 "register_operand")))]
++  "ISA_HAS_LSX"
++{
++  if (<MODE>mode == V4SFmode
++    && TARGET_RECIP_VEC_DIV
++    && optimize_insn_for_speed_p ()
++    && flag_finite_math_only && !flag_trapping_math
++    && flag_unsafe_math_optimizations)
++  {
++    loongarch_emit_swdivsf (operands[0], operands[1],
++           operands[2], V4SFmode);
++    DONE;
++  }
++})
++
++(define_insn "*div<mode>3"
+   [(set (match_operand:FLSX 0 "register_operand" "=f")
+ 	(div:FLSX (match_operand:FLSX 1 "register_operand" "f")
+ 		  (match_operand:FLSX 2 "register_operand" "f")))]
+@@ -1025,7 +1094,23 @@
+   [(set_attr "type" "simd_fmadd")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "sqrt<mode>2"
++(define_expand "sqrt<mode>2"
++  [(set (match_operand:FLSX 0 "register_operand")
++    (sqrt:FLSX (match_operand:FLSX 1 "register_operand")))]
++  "ISA_HAS_LSX"
++{
++  if (<MODE>mode == V4SFmode
++      && TARGET_RECIP_VEC_SQRT
++      && flag_unsafe_math_optimizations
++      && optimize_insn_for_speed_p ()
++      && flag_finite_math_only && !flag_trapping_math)
++    {
++      loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 0);
++      DONE;
++    }
++})
++
++(define_insn "*sqrt<mode>2"
+   [(set (match_operand:FLSX 0 "register_operand" "=f")
+ 	(sqrt:FLSX (match_operand:FLSX 1 "register_operand" "f")))]
+   "ISA_HAS_LSX"
+@@ -1362,8 +1447,8 @@
+    (V2DF "V4SI")])
+ 
+ (define_insn "lsx_vreplgr2vr_<lsxfmt_f>"
+-  [(set (match_operand:LSX 0 "register_operand" "=f,f")
+-	(vec_duplicate:LSX
++  [(set (match_operand:ILSX 0 "register_operand" "=f,f")
++	(vec_duplicate:ILSX
+ 	  (match_operand:<UNITMODE> 1 "reg_or_0_operand" "r,J")))]
+   "ISA_HAS_LSX"
+ {
+@@ -1389,7 +1474,7 @@
+   DONE;
+ })
+ 
+-(define_insn "lsx_vflogb_<flsxfmt>"
++(define_insn "logb<mode>2"
+   [(set (match_operand:FLSX 0 "register_operand" "=f")
+ 	(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFLOGB))]
+@@ -1449,6 +1534,15 @@
+   [(set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>")])
+ 
++(define_insn "lsx_vfrecipe_<flsxfmt>"
++  [(set (match_operand:FLSX 0 "register_operand" "=f")
++    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
++             UNSPEC_RECIPE))]
++  "ISA_HAS_LSX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_DIV"
++  "vfrecipe.<flsxfmt>\t%w0,%w1"
++  [(set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>")])
++
+ (define_insn "lsx_vfrint_<flsxfmt>"
+   [(set (match_operand:FLSX 0 "register_operand" "=f")
+ 	(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
+@@ -1467,6 +1561,42 @@
+   [(set_attr "type" "simd_fdiv")
+    (set_attr "mode" "<MODE>")])
+ 
++(define_insn "lsx_vfrsqrte_<flsxfmt>"
++  [(set (match_operand:FLSX 0 "register_operand" "=f")
++    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
++             UNSPEC_RSQRTE))]
++  "ISA_HAS_LSX && flag_unsafe_math_optimizations && TARGET_RECIP_VEC_SQRT"
++  "vfrsqrte.<flsxfmt>\t%w0,%w1"
++  [(set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>")])
++
++(define_expand "rsqrt<mode>2"
++  [(set (match_operand:FLSX 0 "register_operand" "=f")
++    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
++             UNSPEC_LSX_VFRSQRT))]
++  "ISA_HAS_LSX"
++{
++  if (<MODE>mode == V4SFmode
++      && TARGET_RECIP_VEC_RSQRT
++      && flag_unsafe_math_optimizations
++      && optimize_insn_for_speed_p ()
++      && flag_finite_math_only && !flag_trapping_math)
++    {
++      loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 1);
++      DONE;
++    }
++})
++
++(define_insn "*rsqrt<mode>2"
++  [(set (match_operand:FLSX 0 "register_operand" "=f")
++    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
++             UNSPEC_LSX_VFRSQRT))]
++  "ISA_HAS_LSX"
++  "vfrsqrt.<flsxfmt>\t%w0,%w1"
++  [(set_attr "type" "simd_fdiv")
++   (set_attr "mode" "<MODE>")])
++
++
+ (define_insn "lsx_vftint_s_<ilsxfmt>_<flsxfmt>"
+   [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+ 	(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")]
+@@ -2172,8 +2302,8 @@
+ 
+ (define_insn "lsx_vreplvei_<lsxfmt_f>_scalar"
+   [(set (match_operand:FLSX 0 "register_operand" "=f")
+-	(unspec:FLSX [(match_operand:<UNITMODE> 1 "register_operand" "f")]
+-		     UNSPEC_LSX_VREPLVEI))]
++	(vec_duplicate:FLSX
++      (match_operand:<UNITMODE> 1 "register_operand" "f")))]
+   "ISA_HAS_LSX"
+   "vreplvei.<lsxfmt>\t%w0,%w1,0"
+   [(set_attr "type" "simd_splat")
+@@ -2285,8 +2415,7 @@
+ 					 "vset<lsxne>.<lsxfmt>\t%Z3%w1\n\tbcnez\t%Z3%0");
+ }
+  [(set_attr "type" "simd_branch")
+-  (set_attr "mode" "<MODE>")
+-  (set_attr "compact_form" "never")])
++  (set_attr "mode" "<MODE>")])
+ 
+ (define_insn "lsx_<lsxbr>_v_<lsxfmt_f>"
+  [(set (pc) (if_then_else
+@@ -2304,8 +2433,7 @@
+ 					 "vset<lsxne_v>.v\t%Z3%w1\n\tbcnez\t%Z3%0");
+ }
+  [(set_attr "type" "simd_branch")
+-  (set_attr "mode" "TI")
+-  (set_attr "compact_form" "never")])
++  (set_attr "mode" "TI")])
+ 
+ ;; vec_concate
+ (define_expand "vec_concatv2di"
+@@ -2923,8 +3051,8 @@
+    (set_attr "mode" "V4SF")])
+ 
+ (define_insn "lsx_vfrintrne_s"
+-  [(set (match_operand:V4SI 0 "register_operand" "=f")
+-	(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")]
++  [(set (match_operand:V4SF 0 "register_operand" "=f")
++	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRNE_S))]
+   "ISA_HAS_LSX"
+   "vfrintrne.s\t%w0,%w1"
+@@ -2932,8 +3060,8 @@
+    (set_attr "mode" "V4SF")])
+ 
+ (define_insn "lsx_vfrintrne_d"
+-  [(set (match_operand:V2DI 0 "register_operand" "=f")
+-	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")]
++  [(set (match_operand:V2DF 0 "register_operand" "=f")
++	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRNE_D))]
+   "ISA_HAS_LSX"
+   "vfrintrne.d\t%w0,%w1"
+@@ -2941,8 +3069,8 @@
+    (set_attr "mode" "V2DF")])
+ 
+ (define_insn "lsx_vfrintrz_s"
+-  [(set (match_operand:V4SI 0 "register_operand" "=f")
+-	(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")]
++  [(set (match_operand:V4SF 0 "register_operand" "=f")
++	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRZ_S))]
+   "ISA_HAS_LSX"
+   "vfrintrz.s\t%w0,%w1"
+@@ -2950,8 +3078,8 @@
+    (set_attr "mode" "V4SF")])
+ 
+ (define_insn "lsx_vfrintrz_d"
+-  [(set (match_operand:V2DI 0 "register_operand" "=f")
+-	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")]
++  [(set (match_operand:V2DF 0 "register_operand" "=f")
++	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRZ_D))]
+   "ISA_HAS_LSX"
+   "vfrintrz.d\t%w0,%w1"
+@@ -2959,8 +3087,8 @@
+    (set_attr "mode" "V2DF")])
+ 
+ (define_insn "lsx_vfrintrp_s"
+-  [(set (match_operand:V4SI 0 "register_operand" "=f")
+-	(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")]
++  [(set (match_operand:V4SF 0 "register_operand" "=f")
++	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRP_S))]
+   "ISA_HAS_LSX"
+   "vfrintrp.s\t%w0,%w1"
+@@ -2968,8 +3096,8 @@
+    (set_attr "mode" "V4SF")])
+ 
+ (define_insn "lsx_vfrintrp_d"
+-  [(set (match_operand:V2DI 0 "register_operand" "=f")
+-	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")]
++  [(set (match_operand:V2DF 0 "register_operand" "=f")
++	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRP_D))]
+   "ISA_HAS_LSX"
+   "vfrintrp.d\t%w0,%w1"
+@@ -2977,8 +3105,8 @@
+    (set_attr "mode" "V2DF")])
+ 
+ (define_insn "lsx_vfrintrm_s"
+-  [(set (match_operand:V4SI 0 "register_operand" "=f")
+-	(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")]
++  [(set (match_operand:V4SF 0 "register_operand" "=f")
++	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRM_S))]
+   "ISA_HAS_LSX"
+   "vfrintrm.s\t%w0,%w1"
+@@ -2986,14 +3114,44 @@
+    (set_attr "mode" "V4SF")])
+ 
+ (define_insn "lsx_vfrintrm_d"
+-  [(set (match_operand:V2DI 0 "register_operand" "=f")
+-	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")]
++  [(set (match_operand:V2DF 0 "register_operand" "=f")
++	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")]
+ 		     UNSPEC_LSX_VFRINTRM_D))]
+   "ISA_HAS_LSX"
+   "vfrintrm.d\t%w0,%w1"
+   [(set_attr "type" "simd_shift")
+    (set_attr "mode" "V2DF")])
+ 
++;; Vector versions of the floating-point frint patterns.
++;; Expands to btrunc, ceil, floor, rint.
++(define_insn "<frint_pattern_s>v4sf2"
++ [(set (match_operand:V4SF 0 "register_operand" "=f")
++	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")]
++			 FRINT_S))]
++  "ISA_HAS_LSX"
++  "vfrint<frint_suffix>.s\t%w0,%w1"
++  [(set_attr "type" "simd_shift")
++   (set_attr "mode" "V4SF")])
++
++(define_insn "<frint_pattern_d>v2df2"
++ [(set (match_operand:V2DF 0 "register_operand" "=f")
++	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")]
++			 FRINT_D))]
++  "ISA_HAS_LSX"
++  "vfrint<frint_suffix>.d\t%w0,%w1"
++  [(set_attr "type" "simd_shift")
++   (set_attr "mode" "V2DF")])
++
++;; Expands to round.
++(define_insn "round<mode>2"
++ [(set (match_operand:FLSX 0 "register_operand" "=f")
++	(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
++			 UNSPEC_LSX_VFRINT))]
++  "ISA_HAS_LSX"
++  "vfrint.<flsxfrint>\t%w0,%w1"
++  [(set_attr "type" "simd_shift")
++   (set_attr "mode" "<MODE>")])
++
+ ;; Offset load and broadcast
+ (define_expand "lsx_vldrepl_<lsxfmt_f>"
+   [(match_operand:LSX 0 "register_operand")
+@@ -3019,6 +3177,18 @@
+    (set_attr "mode" "<MODE>")
+    (set_attr "length" "4")])
+ 
++(define_insn "lsx_vldrepl_<lsxfmt_f>_insn_0"
++  [(set (match_operand:LSX 0 "register_operand" "=f")
++    (vec_duplicate:LSX
++      (mem:<UNITMODE> (match_operand:DI 1 "register_operand" "r"))))]
++  "ISA_HAS_LSX"
++{
++    return "vldrepl.<lsxfmt>\t%w0,%1,0";
++}
++  [(set_attr "type" "simd_load")
++   (set_attr "mode" "<MODE>")
++   (set_attr "length" "4")])
++
+ ;; Offset store by sel
+ (define_expand "lsx_vstelm_<lsxfmt_f>"
+   [(match_operand:LSX 0 "register_operand")
+@@ -3047,6 +3217,20 @@
+    (set_attr "mode" "<MODE>")
+    (set_attr "length" "4")])
+ 
++;; Offset is "0"
++(define_insn "lsx_vstelm_<lsxfmt_f>_insn_0"
++  [(set (mem:<UNITMODE> (match_operand:DI 0 "register_operand" "r"))
++    (vec_select:<UNITMODE>
++      (match_operand:LSX 1 "register_operand" "f")
++      (parallel [(match_operand:SI 2 "const_<indeximm>_operand")])))]
++  "ISA_HAS_LSX"
++{
++    return "vstelm.<lsxfmt>\t%w1,%0,0,%2";
++}
++  [(set_attr "type" "simd_store")
++   (set_attr "mode" "<MODE>")
++   (set_attr "length" "4")])
++
+ (define_expand "lsx_vld"
+   [(match_operand:V16QI 0 "register_operand")
+    (match_operand 1 "pmode_register_operand")
+@@ -3179,3 +3363,101 @@
+ }
+   [(set_attr "type" "simd_fcmp")
+    (set_attr "mode" "FCC")])
++
++;; Vector reduction operation
++(define_expand "reduc_plus_scal_v2di"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:V2DI 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (V2DImode);
++  emit_insn (gen_lsx_vhaddw_q_d (tmp, operands[1], operands[1]));
++  emit_insn (gen_vec_extractv2didi (operands[0], tmp, const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_plus_scal_v4si"
++  [(match_operand:SI 0 "register_operand")
++   (match_operand:V4SI 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (V2DImode);
++  rtx tmp1 = gen_reg_rtx (V2DImode);
++  emit_insn (gen_lsx_vhaddw_d_w (tmp, operands[1], operands[1]));
++  emit_insn (gen_lsx_vhaddw_q_d (tmp1, tmp, tmp));
++  emit_insn (gen_vec_extractv4sisi (operands[0], gen_lowpart(V4SImode,tmp1), const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_plus_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:FLSX 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_add<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_<optab>_scal_<mode>"
++  [(any_bitwise:<UNITMODE>
++     (match_operand:<UNITMODE> 0 "register_operand")
++     (match_operand:ILSX 1 "register_operand"))]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_<optab><mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_smax_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:LSX 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_smax<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_smin_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:LSX 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_smin<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_umax_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:ILSX 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_umax<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
++
++(define_expand "reduc_umin_scal_<mode>"
++  [(match_operand:<UNITMODE> 0 "register_operand")
++   (match_operand:ILSX 1 "register_operand")]
++  "ISA_HAS_LSX"
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  loongarch_expand_vector_reduc (gen_umin<mode>3, tmp, operands[1]);
++  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
++        const0_rtx));
++  DONE;
++})
+diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
+index fe3043e3d..2d1598536 100644
+--- a/gcc/config/loongarch/lsxintrin.h
++++ b/gcc/config/loongarch/lsxintrin.h
+@@ -3291,65 +3291,65 @@ __m128i __lsx_vftintrneh_l_s(__m128 _1)
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V4SI, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrne_s(__m128 _1)
++__m128 __lsx_vfrintrne_s(__m128 _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrne_s((v4f32)_1);
++	return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V2DI, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrne_d(__m128d _1)
++__m128d __lsx_vfrintrne_d(__m128d _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrne_d((v2f64)_1);
++	return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V4SI, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrz_s(__m128 _1)
++__m128 __lsx_vfrintrz_s(__m128 _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrz_s((v4f32)_1);
++	return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V2DI, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrz_d(__m128d _1)
++__m128d __lsx_vfrintrz_d(__m128d _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrz_d((v2f64)_1);
++	return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V4SI, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrp_s(__m128 _1)
++__m128 __lsx_vfrintrp_s(__m128 _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrp_s((v4f32)_1);
++	return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V2DI, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrp_d(__m128d _1)
++__m128d __lsx_vfrintrp_d(__m128d _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrp_d((v2f64)_1);
++	return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V4SI, V4SF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrm_s(__m128 _1)
++__m128 __lsx_vfrintrm_s(__m128 _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrm_s((v4f32)_1);
++	return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V2DI, V2DF.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vfrintrm_d(__m128d _1)
++__m128d __lsx_vfrintrm_d(__m128d _1)
+ {
+-	return (__m128i)__builtin_lsx_vfrintrm_d((v2f64)_1);
++	return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1);
+ }
+ 
+ /* Assembly instruction format:          vd, rj, si8, idx.  */
+@@ -4154,19 +4154,19 @@ __m128i __lsx_vsub_q(__m128i _1, __m128i _2)
+ 
+ /* Assembly instruction format:          vd, rj, si12.  */
+ /* Data types in instruction templates:  V16QI, CVPOINTER, SI.  */
+-#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2)	((__m128i)__builtin_lsx_vldrepl_b((void *)(_1), (_2)))
++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2)	((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          vd, rj, si11.  */
+ /* Data types in instruction templates:  V8HI, CVPOINTER, SI.  */
+-#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2)	((__m128i)__builtin_lsx_vldrepl_h((void *)(_1), (_2)))
++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2)	((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          vd, rj, si10.  */
+ /* Data types in instruction templates:  V4SI, CVPOINTER, SI.  */
+-#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2)	((__m128i)__builtin_lsx_vldrepl_w((void *)(_1), (_2)))
++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2)	((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          vd, rj, si9.  */
+ /* Data types in instruction templates:  V2DI, CVPOINTER, SI.  */
+-#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2)	((__m128i)__builtin_lsx_vldrepl_d((void *)(_1), (_2)))
++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2)	((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          vd, vj.  */
+ /* Data types in instruction templates:  V16QI, V16QI.  */
+@@ -4470,7 +4470,7 @@ __m128i __lsx_vextl_q_d(__m128i _1)
+ 
+ /* Assembly instruction format:          vd, rj, si12.  */
+ /* Data types in instruction templates:  V16QI, CVPOINTER, SI.  */
+-#define __lsx_vld(/*void **/ _1, /*si12*/ _2)	((__m128i)__builtin_lsx_vld((void *)(_1), (_2)))
++#define __lsx_vld(/*void **/ _1, /*si12*/ _2)	((__m128i)__builtin_lsx_vld((void const *)(_1), (_2)))
+ 
+ /* Assembly instruction format:          vd, rj, si12.  */
+ /* Data types in instruction templates:  VOID, V16QI, CVPOINTER, SI.  */
+@@ -4547,9 +4547,9 @@ __m128i __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3)
+ /* Assembly instruction format:          vd, rj, rk.  */
+ /* Data types in instruction templates:  V16QI, CVPOINTER, DI.  */
+ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+-__m128i __lsx_vldx(void * _1, long int _2)
++__m128i __lsx_vldx(void const * _1, long int _2)
+ {
+-	return (__m128i)__builtin_lsx_vldx((void *)_1, (long int)_2);
++	return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2);
+ }
+ 
+ /* Assembly instruction format:          vd, rj, rk.  */
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 20638559d..daacaf003 100644
+--- a/gcc/config/loongarch/predicates.md
++++ b/gcc/config/loongarch/predicates.md
+@@ -1,5 +1,7 @@
+-;; Predicate definitions for LARCH.
+-;; Copyright (C) 2004-2018 Free Software Foundation, Inc.
++;; Predicate definitions for LoongArch target.
++;; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;; Contributed by Loongson Co. Ltd.
++;; Based on MIPS target for GNU compiler.
+ ;;
+ ;; This file is part of GCC.
+ ;;
+@@ -19,7 +21,7 @@
+ 
+ (define_predicate "const_uns_arith_operand"
+   (and (match_code "const_int")
+-       (match_test "SMALL_OPERAND_UNSIGNED (INTVAL (op))")))
++       (match_test "IMM12_OPERAND_UNSIGNED (INTVAL (op))")))
+ 
+ (define_predicate "uns_arith_operand"
+   (ior (match_operand 0 "const_uns_arith_operand")
+@@ -45,7 +47,7 @@
+   (ior (match_operand 0 "const_arith_operand")
+        (match_operand 0 "register_operand")))
+ 
+-(define_predicate "const_immlsa_operand"
++(define_predicate "const_immalsl_operand"
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (INTVAL (op), 1, 4)")))
+ 
+@@ -69,9 +71,6 @@
+   (and (match_code "const_int")
+        (match_test "UIMM6_OPERAND (INTVAL (op))")))
+ 
+-(define_predicate "const_uimm7_operand"
+-  (and (match_code "const_int")
+-       (match_test "IN_RANGE (INTVAL (op), 0, 127)")))
+ 
+ (define_predicate "const_uimm8_operand"
+   (and (match_code "const_int")
+@@ -85,10 +84,6 @@
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (INTVAL (op), 0, 32767)")))
+ 
+-(define_predicate "const_imm5_operand"
+-  (and (match_code "const_int")
+-       (match_test "IN_RANGE (INTVAL (op), -16, 15)")))
+-
+ (define_predicate "const_imm10_operand"
+   (and (match_code "const_int")
+        (match_test "IMM10_OPERAND (INTVAL (op))")))
+@@ -101,10 +96,6 @@
+   (and (match_code "const_int")
+        (match_test "IMM13_OPERAND (INTVAL (op))")))
+ 
+-(define_predicate "reg_imm10_operand"
+-  (ior (match_operand 0 "const_imm10_operand")
+-       (match_operand 0 "register_operand")))
+-
+ (define_predicate "aq8b_operand"
+   (and (match_code "const_int")
+        (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)")))
+@@ -137,6 +128,7 @@
+   (and (match_code "const_int")
+        (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)")))
+ 
++
+ (define_predicate "aq12b_operand"
+   (and (match_code "const_int")
+        (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)")))
+@@ -155,7 +147,7 @@
+ 
+ (define_predicate "sle_operand"
+   (and (match_code "const_int")
+-       (match_test "SMALL_OPERAND (INTVAL (op) + 1)")))
++       (match_test "IMM12_OPERAND (INTVAL (op) + 1)")))
+ 
+ (define_predicate "sleu_operand"
+   (and (match_operand 0 "sle_operand")
+@@ -223,179 +215,40 @@
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+ 
+-(define_predicate "const_4_to_7_operand"                           
++(define_predicate "const_4_to_7_operand"
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (INTVAL (op), 4, 7)")))
+- 
++
+ (define_predicate "const_8_to_15_operand"
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+- 
+-(define_predicate "const_16_to_31_operand"
+-  (and (match_code "const_int")
+-       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+- 
+-(define_predicate "qi_mask_operand"
+-  (and (match_code "const_int")
+-       (match_test "UINTVAL (op) == 0xff")))
+ 
+-(define_predicate "hi_mask_operand"
++(define_predicate "const_8_to_11_operand"
+   (and (match_code "const_int")
+-       (match_test "UINTVAL (op) == 0xffff")))
++       (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+ 
+-(define_predicate "lu52i_mask_operand"
++(define_predicate "const_12_to_15_operand"
+   (and (match_code "const_int")
+-       (match_test "UINTVAL (op) == 0xfffffffffffff")))
++       (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+ 
+-(define_predicate "shift_mask_operand"
++(define_predicate "const_16_to_31_operand"
+   (and (match_code "const_int")
+-       (ior (match_test "UINTVAL (op) == 0x3fffffffc")
+-            (match_test "UINTVAL (op) == 0x1fffffffe")
+-	    (match_test "UINTVAL (op) == 0x7fffffff8")
+-	    (match_test "UINTVAL (op) == 0xffffffff0"))))
+-
+-
++       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+ 
+-(define_predicate "si_mask_operand"
++(define_predicate "lu52i_mask_operand"
+   (and (match_code "const_int")
+-       (match_test "UINTVAL (op) == 0xffffffff")))
+-
+-(define_predicate "and_load_operand"
+-  (ior (match_operand 0 "qi_mask_operand")
+-       (match_operand 0 "hi_mask_operand")
+-       (match_operand 0 "si_mask_operand")))
++       (match_test "UINTVAL (op) == 0xfffffffffffff")))
+ 
+ (define_predicate "low_bitmask_operand"
+   (and (match_code "const_int")
+        (match_test "low_bitmask_len (mode, INTVAL (op)) > 12")))
+ 
+-(define_predicate "and_reg_operand"
+-  (ior (match_operand 0 "register_operand")
+-       (match_operand 0 "const_uns_arith_operand")
+-       (match_operand 0 "low_bitmask_operand")
+-       (match_operand 0 "si_mask_operand")))
+-
+-(define_predicate "and_operand"
+-  (ior (match_operand 0 "and_load_operand")
+-       (match_operand 0 "and_reg_operand")))
+-
+-(define_predicate "d_operand"
+-  (and (match_code "reg")
+-       (match_test "GP_REG_P (REGNO (op))")))
+-
+-(define_predicate "lwsp_swsp_operand"
+-  (and (match_code "mem")
+-       (match_test "lwsp_swsp_address_p (XEXP (op, 0), mode)")))
+-
+-(define_predicate "db4_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 4, 0)")))
+-
+-(define_predicate "db7_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 7, 0)")))
+-
+-(define_predicate "db8_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 8, 0)")))
+-
+-(define_predicate "ib3_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op) - 1, 3, 0)")))
+-
+-(define_predicate "sb4_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 4, 0)")))
+-
+-(define_predicate "sb5_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 5, 0)")))
+-
+-(define_predicate "sb8_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)")))
+-
+-(define_predicate "sd8_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)")))
+-
+-(define_predicate "ub4_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 0)")))
+-
+-(define_predicate "ub8_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 0)")))
+-
+-(define_predicate "uh4_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 1)")))
+-
+-(define_predicate "uw4_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 2)")))
+-
+-(define_predicate "uw5_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 5, 2)")))
+-
+-(define_predicate "uw6_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 6, 2)")))
+-
+-(define_predicate "uw8_operand"
+-  (and (match_code "const_int")
+-       (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 2)")))
+-
+-(define_predicate "addiur2_operand"
+-  (and (match_code "const_int")
+-	(ior (match_test "INTVAL (op) == -1")
+-	     (match_test "INTVAL (op) == 1")
+-	     (match_test "INTVAL (op) == 4")
+-	     (match_test "INTVAL (op) == 8")
+-	     (match_test "INTVAL (op) == 12")
+-	     (match_test "INTVAL (op) == 16")
+-	     (match_test "INTVAL (op) == 20")
+-	     (match_test "INTVAL (op) == 24"))))
+-
+-(define_predicate "addiusp_operand"
+-  (and (match_code "const_int")
+-       (ior (match_test "(IN_RANGE (INTVAL (op), 2, 257))")
+-	    (match_test "(IN_RANGE (INTVAL (op), -258, -3))"))))
+-
+-(define_predicate "andi16_operand"
+-  (and (match_code "const_int")
+-	(ior (match_test "IN_RANGE (INTVAL (op), 1, 4)")
+-	     (match_test "IN_RANGE (INTVAL (op), 7, 8)")
+-	     (match_test "IN_RANGE (INTVAL (op), 15, 16)")
+-	     (match_test "IN_RANGE (INTVAL (op), 31, 32)")
+-	     (match_test "IN_RANGE (INTVAL (op), 63, 64)")
+-	     (match_test "INTVAL (op) == 255")
+-	     (match_test "INTVAL (op) == 32768")
+-	     (match_test "INTVAL (op) == 65535"))))
+-
+-(define_predicate "movep_src_register"
+-  (and (match_code "reg")
+-       (ior (match_test ("IN_RANGE (REGNO (op), 2, 3)"))
+-	    (match_test ("IN_RANGE (REGNO (op), 16, 20)")))))
+-
+-(define_predicate "movep_src_operand"
+-  (ior (match_operand 0 "const_0_operand")
+-       (match_operand 0 "movep_src_register")))
+-
+-(define_predicate "fcc_reload_operand"
+-  (and (match_code "reg,subreg")
+-       (match_test "ST_REG_P (true_regnum (op))")))
+-
+-(define_predicate "muldiv_target_operand"
+-		(match_operand 0 "register_operand"))
+-
+ (define_predicate "const_call_insn_operand"
+   (match_code "const,symbol_ref,label_ref")
+ {
+   enum loongarch_symbol_type symbol_type;
+ 
+-  if (!loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_CALL, &symbol_type))
++  if (!loongarch_symbolic_constant_p (op, &symbol_type))
+     return false;
+ 
+   switch (symbol_type)
+@@ -403,9 +256,9 @@
+     case SYMBOL_GOT_DISP:
+       /* Without explicit relocs, there is no special syntax for
+ 	 loading the address of a call destination into a register.
+-	 Using "la $25,foo; jal $25" would prevent the lazy binding
+-	 of "foo", so keep the address of global symbols with the
+-	 jal macro.  */
++	 Using "la.global JIRL_REGS,foo; jirl JIRL_REGS" would prevent the lazy
++	 binding of "foo", so keep the address of global symbols with the jirl
++	 macro.  */
+       return 1;
+ 
+     default:
+@@ -420,7 +273,7 @@
+ (define_predicate "is_const_call_local_symbol"
+   (and (match_operand 0 "const_call_insn_operand")
+        (ior (match_test "loongarch_global_symbol_p (op) == 0")
+-       (match_test "loongarch_symbol_binds_local_p (op) != 0"))
++	    (match_test "loongarch_symbol_binds_local_p (op) != 0"))
+        (match_test "CONSTANT_P (op)")))
+ 
+ (define_predicate "is_const_call_weak_symbol"
+@@ -446,7 +299,6 @@
+ (define_predicate "splittable_const_int_operand"
+   (match_code "const_int")
+ {
+-
+   /* Don't handle multi-word moves this way; we don't want to introduce
+      the individual word-mode moves until after reload.  */
+   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+@@ -454,9 +306,8 @@
+ 
+   /* Otherwise check whether the constant can be loaded in a single
+      instruction.  */
+-//  return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op);
+-  return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op)
+-         && !LU52I_INT (op);
++  return !LU12I_INT (op) && !IMM12_INT (op) && !IMM12_INT_UNSIGNED (op)
++	 && !LU52I_INT (op);
+ })
+ 
+ (define_predicate "move_operand"
+@@ -504,73 +355,34 @@
+     case CONST:
+     case SYMBOL_REF:
+     case LABEL_REF:
+-      return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type));
++      return (loongarch_symbolic_constant_p (op, &symbol_type));
+     default:
+       return true;
+     }
+ })
+ 
+-(define_predicate "consttable_operand"
+-  (match_test "CONSTANT_P (op)"))
+-
+ (define_predicate "symbolic_operand"
+   (match_code "const,symbol_ref,label_ref")
+ {
+   enum loongarch_symbol_type type;
+-  return loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type);
+-})
+-
+-(define_predicate "force_to_mem_operand"
+-  (match_code "const,symbol_ref,label_ref")
+-{
+-  enum loongarch_symbol_type symbol_type;
+-  return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+-	  && loongarch_use_pcrel_pool_p[(int) symbol_type]);
+-})
+-
+-(define_predicate "got_disp_operand"
+-  (match_code "const,symbol_ref,label_ref")
+-{
+-  enum loongarch_symbol_type type;
+-  return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+-	  && type == SYMBOL_GOT_DISP);
++  return loongarch_symbolic_constant_p (op, &type);
+ })
+ 
+-(define_predicate "symbol_ref_operand"
+-  (match_code "symbol_ref"))
+-
+-(define_predicate "stack_operand"
+-  (and (match_code "mem")
+-       (match_test "loongarch_stack_address_p (XEXP (op, 0), GET_MODE (op))")))
+-
+-
+-
+ (define_predicate "equality_operator"
+   (match_code "eq,ne"))
+ 
+-(define_predicate "extend_operator"
+-  (match_code "zero_extend,sign_extend"))
+-
+-(define_predicate "trap_comparison_operator"
+-  (match_code "eq,ne,lt,ltu,ge,geu"))
+-
+ (define_predicate "order_operator"
+   (match_code "lt,ltu,le,leu,ge,geu,gt,gtu"))
+ 
+ ;; For NE, cstore uses sltu instructions in which the first operand is $0.
+ 
+ (define_predicate "loongarch_cstore_operator"
+-  (ior (match_code "eq,gt,gtu,ge,geu,lt,ltu,le,leu")
+-       (match_code "ne")))
++  (match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu"))
+ 
+ (define_predicate "small_data_pattern"
+   (and (match_code "set,parallel,unspec,unspec_volatile,prefetch")
+        (match_test "loongarch_small_data_pattern_p (op)")))
+ 
+-(define_predicate "mem_noofs_operand"
+-  (and (match_code "mem")
+-       (match_code "reg" "0")))
+-
+ ;; Return 1 if the operand is in non-volatile memory.
+ (define_predicate "non_volatile_mem_operand"
+   (and (match_operand 0 "memory_operand")
+@@ -606,12 +418,6 @@
+   return loongarch_const_vector_same_int_p (op, mode, 0, 63);
+ })
+ 
+-(define_predicate "const_vector_same_uimm8_operand"
+-  (match_code "const_vector")
+-{
+-  return loongarch_const_vector_same_int_p (op, mode, 0, 255);
+-})
+-
+ (define_predicate "par_const_vector_shf_set_operand"
+   (match_code "parallel")
+ {
+diff --git a/gcc/config/loongarch/rtems.h b/gcc/config/loongarch/rtems.h
+deleted file mode 100644
+index bbb70b040..000000000
+--- a/gcc/config/loongarch/rtems.h
++++ /dev/null
+@@ -1,39 +0,0 @@
+-/* Definitions for rtems targeting a LARCH using ELF.
+-   Copyright (C) 1996-2018 Free Software Foundation, Inc.
+-   Contributed by Joel Sherrill (joel@OARcorp.com).
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-/* Specify predefined symbols in preprocessor.  */
+-
+-#define TARGET_OS_CPP_BUILTINS()	\
+-do {					\
+-  builtin_define ("__rtems__");		\
+-  builtin_define ("__USE_INIT_FINI__");	\
+-  builtin_assert ("system=rtems");	\
+-} while (0)
+-
+-/* No sdata.
+- * The RTEMS BSPs expect -G0
+- */
+-#undef LARCH_DEFAULT_GVALUE
+-#define LARCH_DEFAULT_GVALUE 0
+diff --git a/gcc/config/loongarch/sde.opt b/gcc/config/loongarch/sde.opt
+deleted file mode 100644
+index 321217d51..000000000
+--- a/gcc/config/loongarch/sde.opt
++++ /dev/null
+@@ -1,28 +0,0 @@
+-; LARCH SDE options.
+-;
+-; Copyright (C) 2010-2018 Free Software Foundation, Inc.
+-;
+-; This file is part of GCC.
+-;
+-; GCC is free software; you can redistribute it and/or modify it under
+-; the terms of the GNU General Public License as published by the Free
+-; Software Foundation; either version 3, or (at your option) any later
+-; version.
+-;
+-; GCC is distributed in the hope that it will be useful, but WITHOUT
+-; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-; License for more details.
+-;
+-; You should have received a copy of the GNU General Public License
+-; along with GCC; see the file COPYING3.  If not see
+-; <http://www.gnu.org/licenses/>.
+-
+-; -mcode-xonly is a traditional alias for -mcode-readable=pcrel and
+-; -mno-data-in-code is a traditional alias for -mcode-readable=no.
+-
+-mno-data-in-code
+-Target RejectNegative Alias(mcode-readable=, no)
+-
+-mcode-xonly
+-Target RejectNegative Alias(mcode-readable=, pcrel)
+diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
+index 5a16c4fa3..abc401339 100644
+--- a/gcc/config/loongarch/sync.md
++++ b/gcc/config/loongarch/sync.md
+@@ -1,7 +1,7 @@
+-;; Machine description for LARCH atomic operations.
+-;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
+-;; Contributed by Andrew Waterman (andrew@sifive.com).
+-;; Based on LARCH target for GNU compiler.
++;; Machine description for LoongArch atomic operations.
++;; Copyright (C) 2020-2022 Free Software Foundation, Inc.
++;; Contributed by Loongson Co. Ltd.
++;; Based on MIPS and RISC-V target for GNU compiler.
+ 
+ ;; This file is part of GCC.
+ 
+@@ -29,6 +29,7 @@
+   UNSPEC_COMPARE_AND_SWAP_NAND
+   UNSPEC_SYNC_OLD_OP
+   UNSPEC_SYNC_EXCHANGE
++  UNSPEC_ATOMIC_LOAD
+   UNSPEC_ATOMIC_STORE
+   UNSPEC_MEMORY_BARRIER
+ ])
+@@ -37,21 +38,25 @@
+ (define_code_attr atomic_optab
+   [(plus "add") (ior "or") (xor "xor") (and "and")])
+ 
++(define_mode_iterator AMO_BHWD [(QI "TARGET_uARCH_LA664")
++				(HI "TARGET_uARCH_LA664")
++				SI DI])
++
+ ;; This attribute gives the format suffix for atomic memory operations.
+-(define_mode_attr amo [(SI "w") (DI "d")])
++(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
++
++;; <amop> expands to the name of the atomic operand that implements a
++;; particular code.
++(define_code_attr amop [(ior "or") (xor "xor") (and "and") (plus "add")])
+ 
+-;; <amop> expands to the name of the atomic operand that implements a particular code.
+-(define_code_attr amop [(ior "or")
+-			(xor "xor")
+-			(and "and")
+-			(plus "add")])
+ ;; Memory barriers.
+ 
+ (define_expand "mem_thread_fence"
+   [(match_operand:SI 0 "const_int_operand" "")] ;; model
+   ""
+ {
+-  if (INTVAL (operands[0]) != MEMMODEL_RELAXED)
++  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
++  if (!is_mm_relaxed (model))
+     {
+       rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+       MEM_VOLATILE_P (mem) = 1;
+@@ -60,37 +65,79 @@
+   DONE;
+ })
+ 
+-;; Until the LARCH memory model (hence its mapping from C++) is finalized,
++;; Until the LoongArch memory model (hence its mapping from C++) is finalized,
+ ;; conservatively emit a full FENCE.
+ (define_insn "mem_thread_fence_1"
+   [(set (match_operand:BLK 0 "" "")
+ 	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
+    (match_operand:SI 1 "const_int_operand" "")] ;; model
+   ""
+-  "dbar\t0")
++{
++  enum memmodel model = memmodel_from_int (INTVAL (operands[1]));
++  if (is_mm_consume (model))
++    return "dbar\t0x700";
++  else if (is_mm_acquire (model))
++    return "dbar\t0x14";
++  else
++    return "dbar\t0x10";
++})
+ 
+ ;; Atomic memory operations.
+ 
++(define_insn "atomic_load<mode>"
++  [(set (match_operand:QHWD 0 "register_operand" "=r")
++    (unspec_volatile:QHWD
++      [(match_operand:QHWD 1 "memory_operand" "+m")
++       (match_operand:SI 2 "const_int_operand")]			;; model
++      UNSPEC_ATOMIC_LOAD))]
++  ""
++{
++  enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
++  if (is_mm_relaxed (model) || is_mm_release (model))
++    return "ld.<size>\t%0,%1";
++  if (is_mm_consume (model))
++    return "ld.<size>\t%0,%1\n\tdbar\t0x700";
++  else
++    return "ld.<size>\t%0,%1\n\tdbar\t0x14";
++})
++
+ ;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
+ (define_insn "atomic_store<mode>"
+-  [(set (match_operand:GPR 0 "memory_operand" "+ZB")
+-    (unspec_volatile:GPR
+-      [(match_operand:GPR 1 "reg_or_0_operand" "rJ")
++  [(set (match_operand:QHWD 0 "memory_operand" "+m")
++    (unspec_volatile:QHWD
++      [(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
+        (match_operand:SI 2 "const_int_operand")]      ;; model
+       UNSPEC_ATOMIC_STORE))]
+   ""
+-  "amswap%A2.<amo>\t$zero,%z1,%0"
++{
++  enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
++  if (is_mm_relaxed (model) || is_mm_acquire (model) || is_mm_consume (model))
++    return "st.<size>\t%z1,%0";
++  else
++    return "dbar\t0x12\n\tst.<size>\t%z1,%0";
++}
+   [(set (attr "length") (const_int 8))])
+ 
+ (define_insn "atomic_<atomic_optab><mode>"
+   [(set (match_operand:GPR 0 "memory_operand" "+ZB")
+ 	(unspec_volatile:GPR
+ 	  [(any_atomic:GPR (match_dup 0)
+-		     (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
++			   (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 2 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+-  "am<amop>%A2.<amo>\t$zero,%z1,%0"
++  "%J2\n\tam<amop>%A2.<amo>\t$zero,%z1,%0\n\t%K2"
++  [(set (attr "length") (const_int 8))])
++
++(define_insn "atomic_add<mode>"
++  [(set (match_operand:SHORT 0 "memory_operand" "+ZB")
++	(unspec_volatile:SHORT
++	  [(plus:SHORT (match_dup 0)
++		       (match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
++	   (match_operand:SI 2 "const_int_operand")] ;; model
++	 UNSPEC_SYNC_OLD_OP))]
++  "TARGET_uARCH_LA664"
++  "%J2\n\tamadd%A2.<amo>\t$zero,%z1,%0\n\t%K2"
+   [(set (attr "length") (const_int 8))])
+ 
+ (define_insn "atomic_fetch_<atomic_optab><mode>"
+@@ -99,11 +146,11 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR
+ 	  [(any_atomic:GPR (match_dup 1)
+-		     (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
++			   (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+-  "am<amop>%A3.<amo>\t%0,%z2,%1"
++  "%J3\n\tam<amop>%A3.<amo>\t%0,%z2,%1\n\t%K3"
+   [(set (attr "length") (const_int 8))])
+ 
+ (define_insn "atomic_exchange<mode>"
+@@ -115,35 +162,90 @@
+    (set (match_dup 1)
+ 	(match_operand:GPR 2 "register_operand" "r"))]
+   ""
+-  "amswap%A3.<amo>\t%0,%z2,%1"
++  "%J3\n\tamswap%A3.<amo>\t%0,%z2,%1\n\t%K3"
++  [(set (attr "length") (const_int 8))])
++
++(define_insn "atomic_exchange<mode>_1"
++  [(set (match_operand:SHORT 0 "register_operand" "=&r")
++	(unspec_volatile:SHORT
++	  [(match_operand:SHORT 1 "memory_operand" "+ZB")
++	   (match_operand:SI 3 "const_int_operand")] ;; model
++	  UNSPEC_SYNC_EXCHANGE))
++   (set (match_dup 1)
++	(match_operand:SHORT 2 "register_operand" "r"))]
++  ""
++  "%J3\n\tamswap%A3.<amo>\t%0,%z2,%1\n\t%K3"
+   [(set (attr "length") (const_int 8))])
+ 
+ (define_insn "atomic_cas_value_strong<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+-			      (match_operand:SI 4 "const_int_operand")  ;; mod_s
+-			      (match_operand:SI 5 "const_int_operand")] ;; mod_f
++			      (match_operand:SI 4 "const_int_operand")]  ;; mod_s
+ 	 UNSPEC_COMPARE_AND_SWAP))
+-   (clobber (match_scratch:GPR 6 "=&r"))]
++   (clobber (match_scratch:GPR 5 "=&r"))]
+   ""
+ {
+-  return "%G5\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "bne\t%0,%z2,2f\n\t"
+-         "or%i3\t%6,$zero,%3\n\t"
+-         "sc.<amo>\t%6,%1\n\t"
+-         "beq\t$zero,%6,1b\n\t"
+-         "b\t3f\n\t"
+-         "2:\n\t"
+-         "dbar\t0x700\n\t"
+-         "3:\n\t";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[4]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("bne\t%0,%z2,2f", operands);
++      output_asm_insn ("or%i3\t%5,$zero,%3", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%5,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%5,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%5,1b", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	{
++	  output_asm_insn ("b\t3f", operands);
++	  output_asm_insn ("2:", operands);
++	  output_asm_insn ("dbar\t0x700", operands);
++	  output_asm_insn ("3:", operands);
++	}
++      else
++	output_asm_insn ("2:", operands);
++      return "";
++    }
++  else
++    return "%G4\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "bne\\t%0,%z2,2f\\n\\t"
++	   "or%i3\\t%5,$zero,%3\\n\\t"
++	   "sc.<amo>\\t%5,%1\\n\\t"
++	   "beq\\t$zero,%5,1b\\n\\t"
++	   "b\\t3f\\n\\t"
++	   "2:\\n\\t"
++	   "dbar\\t0x700\\n\\t"
++	   "3:\\n\\t";
+ }
+   [(set (attr "length") (const_int 32))])
+ 
++(define_insn "atomic_cas_value_strong<mode>_3a6000"
++  [(set (match_operand:AMO_BHWD 0 "register_operand" "=&r")
++	(match_operand:AMO_BHWD 1 "memory_operand" "+ZB"))
++   (set (match_dup 1)
++	(unspec_volatile:AMO_BHWD [(match_operand:AMO_BHWD 2 "reg_or_0_operand" "rJ")
++			      (match_operand:AMO_BHWD 3 "reg_or_0_operand" "rJ")
++			      (match_operand:SI 4 "const_int_operand")]  ;; mod_s
++	 UNSPEC_COMPARE_AND_SWAP))]
++  "TARGET_uARCH_LA664"
++  "ori\t%0,%z2,0\n\t%J4\n\tamcas%A4.<amo>\t%0,%z3,%1\n\t%K4"
++  [(set (attr "length") (const_int 32))])
++
+ (define_expand "atomic_compare_and_swap<mode>"
+   [(match_operand:SI 0 "register_operand" "")   ;; bool output
+    (match_operand:GPR 1 "register_operand" "")  ;; val output
+@@ -155,9 +257,29 @@
+    (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+   ""
+ {
+-  emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
+-						operands[3], operands[4],
+-						operands[6], operands[7]));
++  rtx mod_s, mod_f;
++
++  mod_s = operands[6];
++  mod_f = operands[7];
++
++  /* Normally the succ memory model must be stronger than fail, but in the
++     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
++     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
++
++  if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
++      && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
++    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
++
++  operands[6] = mod_s;
++
++  if (TARGET_uARCH_LA664)
++    emit_insn (gen_atomic_cas_value_strong<mode>_3a6000 (operands[1], operands[2],
++							 operands[3], operands[4],
++							 operands[6]));
++  else
++    emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
++						  operands[3], operands[4],
++						  operands[6]));
+ 
+   rtx compare = operands[1];
+   if (operands[3] != const0_rtx)
+@@ -174,7 +296,8 @@
+       compare = reg;
+     }
+ 
+-  emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx)));
++  emit_insn (gen_rtx_SET (operands[0],
++			  gen_rtx_EQ (SImode, compare, const0_rtx)));
+   DONE;
+ })
+ 
+@@ -185,7 +308,7 @@
+   ""
+ {
+   /* We have no QImode atomics, so use the address LSBs to form a mask,
+-     then use an aligned SImode atomic. */
++     then use an aligned SImode atomic.  */
+   rtx result = operands[0];
+   rtx mem = operands[1];
+   rtx model = operands[2];
+@@ -221,11 +344,9 @@
+   DONE;
+ })
+ 
+-
+-
+ (define_insn "atomic_cas_value_cmp_and_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+@@ -236,23 +357,56 @@
+    (clobber (match_scratch:GPR 7 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%2\n\t"
+-         "bne\t%7,%z4,2f\n\t"
+-         "and\t%7,%0,%z3\n\t"
+-         "or%i5\t%7,%7,%5\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b\n\t"
+-         "b\t3f\n\t"
+-         "2:\n\t"
+-         "dbar\t0x700\n\t"
+-         "3:\n\t";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%2", operands);
++      output_asm_insn ("bne\t%7,%z4,2f", operands);
++      output_asm_insn ("and\t%7,%0,%z3", operands);
++      output_asm_insn ("or%i5\t%7,%7,%5", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	{
++	  output_asm_insn ("b\t3f", operands);
++	  output_asm_insn ("2:", operands);
++	  output_asm_insn ("dbar\t0x700", operands);
++	  output_asm_insn ("3:", operands);
++	}
++      else
++	output_asm_insn ("2:", operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%2\\n\\t"
++	   "bne\\t%7,%z4,2f\\n\\t"
++	   "and\\t%7,%0,%z3\\n\\t"
++	   "or%i5\\t%7,%7,%5\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b\\n\\t"
++	   "b\\t3f\\n\\t"
++	   "2:\\n\\t"
++	   "dbar\\t0x700\\n\\t"
++	   "3:\\n\\t";
+ }
+   [(set (attr "length") (const_int 40))])
+ 
+-
+ (define_expand "atomic_compare_and_swap<mode>"
+   [(match_operand:SI 0 "register_operand" "")   ;; bool output
+    (match_operand:SHORT 1 "register_operand" "")  ;; val output
+@@ -264,43 +418,59 @@
+    (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+   ""
+ {
+-  union loongarch_gen_fn_ptrs generator;
+-  generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[1],
+-			   operands[2],
+-			   operands[3],
+-			   operands[4],
+-			   operands[7]);
++  rtx mod_s, mod_f;
+ 
+-  rtx compare = operands[1];
+-  if (operands[3] != const0_rtx)
+-    {
+-      machine_mode mode = GET_MODE (operands[3]);
+-      rtx op1 = convert_modes (SImode,  mode,  operands[1],  true);
+-      rtx op3 = convert_modes (SImode,  mode,  operands[3],  true);
+-      rtx difference = gen_rtx_MINUS (SImode, op1, op3);
+-      compare = gen_reg_rtx (SImode);
+-      emit_insn (gen_rtx_SET (compare, difference));
+-    }
++  mod_s = operands[6];
++  mod_f = operands[7];
+ 
+-  if (word_mode != <MODE>mode)
++  /* Normally the succ memory model must be stronger than fail, but in the
++     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
++     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
++
++  if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
++      && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
++    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
++
++  operands[6] = mod_s;
++
++  if (TARGET_uARCH_LA664)
++    emit_insn (gen_atomic_cas_value_strong<mode>_3a6000 (operands[1], operands[2],
++							 operands[3], operands[4],
++							 operands[6]));
++  else
+     {
+-      rtx reg = gen_reg_rtx (word_mode);
+-      emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)));
+-      compare = reg;
++      union loongarch_gen_fn_ptrs generator;
++      generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
++      loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
++				    operands[3], operands[4], operands[6]);
+     }
+ 
+-  emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx)));
++      rtx compare = operands[1];
++      if (operands[3] != const0_rtx)
++	{
++	  machine_mode mode = GET_MODE (operands[3]);
++	  rtx op1 = convert_modes (SImode, mode, operands[1], true);
++	  rtx op3 = convert_modes (SImode, mode, operands[3], true);
++	  rtx difference = gen_rtx_MINUS (SImode, op1, op3);
++	  compare = gen_reg_rtx (SImode);
++	  emit_insn (gen_rtx_SET (compare, difference));
++	}
++
++      if (word_mode != <MODE>mode)
++	{
++	  rtx reg = gen_reg_rtx (word_mode);
++	  emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)));
++	  compare = reg;
++	}
++
++      emit_insn (gen_rtx_SET (operands[0],
++			      gen_rtx_EQ (SImode, compare, const0_rtx)));
+   DONE;
+ })
+ 
+-
+-
+-
+ (define_insn "atomic_cas_value_add_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
+@@ -312,24 +482,46 @@
+    (clobber (match_scratch:GPR 8 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%3\n\t"
+-         "add.w\t%8,%0,%z5\n\t"
+-         "and\t%8,%8,%z2\n\t"
+-         "or%i8\t%7,%7,%8\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%3", operands);
++      output_asm_insn ("add.w\t%8,%0,%z5", operands);
++      output_asm_insn ("and\t%8,%8,%z2", operands);
++      output_asm_insn ("or%i8\t%7,%7,%8", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b",operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%3\\n\\t"
++	   "add.w\\t%8,%0,%z5\\n\\t"
++	   "and\\t%8,%8,%z2\\n\\t"
++	   "or%i8\\t%7,%7,%8\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b";
+ }
+ 
+   [(set (attr "length") (const_int 32))])
+ 
+-
+-
+ (define_insn "atomic_cas_value_sub_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
+@@ -341,23 +533,45 @@
+    (clobber (match_scratch:GPR 8 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%3\n\t"
+-         "sub.w\t%8,%0,%z5\n\t"
+-         "and\t%8,%8,%z2\n\t"
+-         "or%i8\t%7,%7,%8\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%3", operands);
++      output_asm_insn ("sub.w\t%8,%0,%z5", operands);
++      output_asm_insn ("and\t%8,%8,%z2", operands);
++      output_asm_insn ("or%i8\t%7,%7,%8", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b", operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%3\\n\\t"
++	   "sub.w\\t%8,%0,%z5\\n\\t"
++	   "and\\t%8,%8,%z2\\n\\t"
++	   "or%i8\\t%7,%7,%8\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b";
+ }
+   [(set (attr "length") (const_int 32))])
+ 
+-
+-
+ (define_insn "atomic_cas_value_and_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
+@@ -369,21 +583,45 @@
+    (clobber (match_scratch:GPR 8 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%3\n\t"
+-         "and\t%8,%0,%z5\n\t"
+-         "and\t%8,%8,%z2\n\t"
+-         "or%i8\t%7,%7,%8\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%3", operands);
++      output_asm_insn ("and\t%8,%0,%z5", operands);
++      output_asm_insn ("and\t%8,%8,%z2", operands);
++      output_asm_insn ("or%i8\t%7,%7,%8", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b", operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%3\\n\\t"
++	   "and\\t%8,%0,%z5\\n\\t"
++	   "and\\t%8,%8,%z2\\n\\t"
++	   "or%i8\\t%7,%7,%8\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b";
+ }
+   [(set (attr "length") (const_int 32))])
+ 
+ (define_insn "atomic_cas_value_xor_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
+@@ -395,22 +633,46 @@
+    (clobber (match_scratch:GPR 8 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%3\n\t"
+-         "xor\t%8,%0,%z5\n\t"
+-         "and\t%8,%8,%z2\n\t"
+-         "or%i8\t%7,%7,%8\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%3", operands);
++      output_asm_insn ("xor\t%8,%0,%z5", operands);
++      output_asm_insn ("and\t%8,%8,%z2", operands);
++      output_asm_insn ("or%i8\t%7,%7,%8", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b", operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%3\\n\\t"
++	   "xor\\t%8,%0,%z5\\n\\t"
++	   "and\\t%8,%8,%z2\\n\\t"
++	   "or%i8\\t%7,%7,%8\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b";
+ }
+ 
+   [(set (attr "length") (const_int 32))])
+ 
+ (define_insn "atomic_cas_value_or_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
+@@ -422,22 +684,46 @@
+    (clobber (match_scratch:GPR 8 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%3\n\t"
+-         "or\t%8,%0,%z5\n\t"
+-         "and\t%8,%8,%z2\n\t"
+-         "or%i8\t%7,%7,%8\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%3", operands);
++      output_asm_insn ("or\t%8,%0,%z5", operands);
++      output_asm_insn ("and\t%8,%8,%z2", operands);
++      output_asm_insn ("or%i8\t%7,%7,%8", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b", operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%3\\n\\t"
++	   "or\\t%8,%0,%z5\\n\\t"
++	   "and\\t%8,%8,%z2\\n\\t"
++	   "or%i8\\t%7,%7,%8\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b";
+ }
+ 
+   [(set (attr "length") (const_int 32))])
+ 
+ (define_insn "atomic_cas_value_nand_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
+@@ -449,21 +735,45 @@
+    (clobber (match_scratch:GPR 8 "=&r"))]
+   ""
+ {
+-  return "%G6\n\t"
+-         "1:\n\t"
+-         "ll.<amo>\t%0,%1\n\t"
+-         "and\t%7,%0,%3\n\t"
+-         "and\t%8,%0,%z5\n\t"
+-         "xor\t%8,%8,%z2\n\t"
+-         "or%i8\t%7,%7,%8\n\t"
+-         "sc.<amo>\t%7,%1\n\t"
+-         "beq\t$zero,%7,1b";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%3", operands);
++      output_asm_insn ("and\t%8,%0,%z5", operands);
++      output_asm_insn ("xor\t%8,%8,%z2", operands);
++      output_asm_insn ("or%i8\t%7,%7,%8", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beq\t$zero,%7,1b", operands);
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%3\\n\\t"
++	   "and\\t%8,%0,%z5\\n\\t"
++	   "xor\\t%8,%8,%z2\\n\\t"
++	   "or%i8\\t%7,%7,%8\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beq\\t$zero,%7,1b";
+ }
+   [(set (attr "length") (const_int 32))])
+ 
+ (define_insn "atomic_cas_value_exchange_7_<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+-	(match_operand:GPR 1 "memory_operand" "+ZC"))
++	(match_operand:GPR 1 "memory_operand" "+ZB"))
+    (set (match_dup 1)
+ 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+ 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+@@ -474,13 +784,36 @@
+    (clobber (match_scratch:GPR 7 "=&r"))]
+   ""
+ {
+-  return "%G6\\n\\t"
+-	 "1:\\n\\t"
+-	 "ll.<amo>\\t%0,%1\\n\\t"
+-	 "and\\t%7,%0,%z3\\n\\t"
+-	 "or%i5\\t%7,%7,%5\\n\\t"
+-	 "sc.<amo>\\t%7,%1\\n\\t"
+-	 "beqz\\t%7,1b\\n\\t";
++  if (TARGET_uARCH_LA664)
++    {
++      enum memmodel model = memmodel_from_int (INTVAL (operands[6]));
++      output_asm_insn ("1:",operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
++	output_asm_insn ("ll.<amo>\t%0,%1", operands);
++      else
++	output_asm_insn ("llacq.<amo>\t%0,%1", operands);
++
++      output_asm_insn ("and\t%7,%0,%z3", operands);
++      output_asm_insn ("or%i5\t%7,%7,%5", operands);
++
++      if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
++	output_asm_insn ("sc.<amo>\t%7,%1", operands);
++      else
++	output_asm_insn ("screl.<amo>\t%7,%1", operands);
++
++      output_asm_insn ("beqz\t%7,1b", operands);
++
++      return "";
++    }
++  else
++    return "%G6\\n\\t"
++	   "1:\\n\\t"
++	   "ll.<amo>\\t%0,%1\\n\\t"
++	   "and\\t%7,%0,%z3\\n\\t"
++	   "or%i5\\t%7,%7,%5\\n\\t"
++	   "sc.<amo>\\t%7,%1\\n\\t"
++	   "beqz\\t%7,1b\\n\\t";
+ }
+   [(set (attr "length") (const_int 20))])
+ 
+@@ -494,17 +827,30 @@
+ 	(match_operand:SHORT 2 "register_operand"))]
+   ""
+ {
+-  union loongarch_gen_fn_ptrs generator;
+-  generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   const0_rtx,
+-			   operands[2],
+-			   operands[3]);
++  if (TARGET_uARCH_LA664)
++    emit_insn (gen_atomic_exchange<mode>_1 (operands[0], operands[1], operands[2], operands[3]));
++  else
++    {
++      union loongarch_gen_fn_ptrs generator;
++      generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
++      loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				    const0_rtx, operands[2], operands[3]);
++    }
+   DONE;
+ })
+ 
++(define_insn "atomic_fetch_add<mode>_1"
++  [(set (match_operand:SHORT 0 "register_operand" "=&r")
++	(match_operand:SHORT 1 "memory_operand" "+ZB"))
++   (set (match_dup 1)
++	(unspec_volatile:SHORT
++	  [(plus:SHORT (match_dup 1)
++		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
++	   (match_operand:SI 3 "const_int_operand")] ;; model
++	 UNSPEC_SYNC_OLD_OP))]
++  ""
++  "%J3\n\tamadd%A3.<amo>\t%0,%z2,%1\n\t%K3"
++  [(set (attr "length") (const_int 8))])
+ 
+ (define_expand "atomic_fetch_add<mode>"
+   [(set (match_operand:SHORT 0 "register_operand" "=&r")
+@@ -512,19 +858,21 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:SHORT
+ 	  [(plus:SHORT (match_dup 1)
+-		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
++		       (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+ {
+-  union loongarch_gen_fn_ptrs generator;
+-  generator.fn_7 = gen_atomic_cas_value_add_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   operands[1],
+-			   operands[2],
+-			   operands[3]);
++  if (TARGET_uARCH_LA664)
++    emit_insn (gen_atomic_fetch_add<mode>_1 (operands[0], operands[1],
++					     operands[2], operands[3]));
++  else
++    {
++      union loongarch_gen_fn_ptrs generator;
++      generator.fn_7 = gen_atomic_cas_value_add_7_si;
++      loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				    operands[1], operands[2], operands[3]);
++    }
+   DONE;
+ })
+ 
+@@ -534,19 +882,15 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:SHORT
+ 	  [(minus:SHORT (match_dup 1)
+-		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
++			(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+ {
+   union loongarch_gen_fn_ptrs generator;
+   generator.fn_7 = gen_atomic_cas_value_sub_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   operands[1],
+-			   operands[2],
+-			   operands[3]);
++  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				operands[1], operands[2], operands[3]);
+   DONE;
+ })
+ 
+@@ -556,19 +900,15 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:SHORT
+ 	  [(and:SHORT (match_dup 1)
+-		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
++		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+ {
+   union loongarch_gen_fn_ptrs generator;
+   generator.fn_7 = gen_atomic_cas_value_and_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   operands[1],
+-			   operands[2],
+-			   operands[3]);
++  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				operands[1], operands[2], operands[3]);
+   DONE;
+ })
+ 
+@@ -578,19 +918,15 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:SHORT
+ 	  [(xor:SHORT (match_dup 1)
+-		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
++		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+ {
+   union loongarch_gen_fn_ptrs generator;
+   generator.fn_7 = gen_atomic_cas_value_xor_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   operands[1],
+-			   operands[2],
+-			   operands[3]);
++  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				operands[1], operands[2], operands[3]);
+   DONE;
+ })
+ 
+@@ -600,19 +936,15 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:SHORT
+ 	  [(ior:SHORT (match_dup 1)
+-		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
++		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+ {
+   union loongarch_gen_fn_ptrs generator;
+   generator.fn_7 = gen_atomic_cas_value_or_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   operands[1],
+-			   operands[2],
+-			   operands[3]);
++  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				operands[1], operands[2], operands[3]);
+   DONE;
+ })
+ 
+@@ -622,18 +954,14 @@
+    (set (match_dup 1)
+ 	(unspec_volatile:SHORT
+ 	  [(not:SHORT (and:SHORT (match_dup 1)
+-		                 (match_operand:SHORT 2 "reg_or_0_operand" "rJ")))
++				 (match_operand:SHORT 2 "reg_or_0_operand" "rJ")))
+ 	   (match_operand:SI 3 "const_int_operand")] ;; model
+ 	 UNSPEC_SYNC_OLD_OP))]
+   ""
+ {
+   union loongarch_gen_fn_ptrs generator;
+   generator.fn_7 = gen_atomic_cas_value_nand_7_si;
+-  loongarch_expand_atomic_qihi (generator,
+-			   operands[0],
+-			   operands[1],
+-			   operands[1],
+-			   operands[2],
+-			   operands[3]);
++  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
++				operands[1], operands[2], operands[3]);
+   DONE;
+ })
+diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
+index 58f27f89d..5ecf814fa 100644
+--- a/gcc/config/loongarch/t-linux
++++ b/gcc/config/loongarch/t-linux
+@@ -16,8 +16,65 @@
+ # along with GCC; see the file COPYING3.  If not see
+ # <http://www.gnu.org/licenses/>.
+ 
+-MULTILIB_OSDIRNAMES := ../lib64$(call if_multiarch,:loongarch64-linux-gnu)
+-MULTIARCH_DIRNAME := $(call if_multiarch,loongarch64-linux-gnu)
++# Multilib
++MULTILIB_OPTIONS = mabi=lp64d/mabi=lp64f/mabi=lp64s
++MULTILIB_DIRNAMES = . base/lp64f base/lp64s
++ 
++# The GCC driver always gets all abi-related options on the command line.
++# (see loongarch-driver.c:driver_get_normalized_m_opts)
++comma=,
++MULTILIB_REQUIRED = $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\
++		    $(firstword $(subst /, ,$(mlib))))
+ 
+-# haven't supported lp32 yet
+-MULTILIB_EXCEPTIONS = mabi=lp32
++SPECS = specs.install
++
++# temporary self_spec when building libraries (e.g. libgcc)
++gen_mlib_spec = $(if $(word 2,$1),\
++		%{$(firstword $1):$(patsubst %,-%,$(wordlist 2,$(words $1),$1))})
++
++# clean up the result of DRIVER_SELF_SPEC to avoid conflict
++lib_build_self_spec  = %<march=* %<mtune=* %<mcmodel=* %<mfpu=* %<msimd=*
++
++# build libraries with -mstrict-align by default
++lib_build_self_spec += -mstrict-align
++
++# append user-specified build options from --with-multilib-list
++lib_build_self_spec += $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\
++                       $(call gen_mlib_spec,$(subst /, ,$(mlib))))
++
++specs: specs.install
++	sed '/^*self_spec:$$/{ n;s/^$$/$(lib_build_self_spec)/g; }' $< > $@
++
++# Remove lib_build_self_specs before regression tests.
++.PHONY: remove-lib-specs
++check check-host check-target $(CHECK_TARGETS) $(lang_checks): remove-lib-specs
++remove-lib-specs:
++	-mv -f specs.install specs 2>/dev/null
++
++# Multiarch
++ifneq ($(call if_multiarch,yes),yes)
++    # Define LA_DISABLE_MULTIARCH if multiarch is disabled.
++    tm_defines += LA_DISABLE_MULTIARCH
++else
++    # Only define MULTIARCH_DIRNAME when multiarch is enabled,
++    # or it would always introduce ${target} into the search path.
++    MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET)
++endif
++
++# Don't define MULTILIB_OSDIRNAMES if multilib is disabled.
++ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
++
++    MULTILIB_OSDIRNAMES = \
++      mabi.lp64d=../lib64$\
++      $(call if_multiarch,:loongarch64-linux-gnu)
++
++    MULTILIB_OSDIRNAMES += \
++      mabi.lp64f=../lib64/f32$\
++      $(call if_multiarch,:loongarch64-linux-gnuf32)
++
++    MULTILIB_OSDIRNAMES += \
++      mabi.lp64s=../lib64/sf$\
++      $(call if_multiarch,:loongarch64-linux-gnusf)
++else
++    MULTILIB_OSDIRNAMES := ../lib64
++endif
+diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
+index 5689da44a..9d32fbcf6 100644
+--- a/gcc/config/loongarch/t-loongarch
++++ b/gcc/config/loongarch/t-loongarch
+@@ -16,14 +16,20 @@
+ # along with GCC; see the file COPYING3.  If not see
+ # <http://www.gnu.org/licenses/>.
+ 
+-$(srcdir)/config/loongarch/loongarch-tables.opt: $(srcdir)/config/loongarch/genopt.sh \
+-  $(srcdir)/config/loongarch/loongarch-cpus.def
+-	$(SHELL) $(srcdir)/config/loongarch/genopt.sh $(srcdir)/config/loongarch > \
+-		$(srcdir)/config/loongarch/loongarch-tables.opt
++# Canonical target triplet from config.gcc
++LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\
++$(filter LA_MULTIARCH_TRIPLET=%,$(tm_defines)))
+ 
+-frame-header-opt.o: $(srcdir)/config/loongarch/frame-header-opt.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++# String definition header
++LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h
++$(LA_STR_H): s-loongarch-str ; @true
++s-loongarch-str: $(srcdir)/config/loongarch/genopts/genstr.sh \
++	$(srcdir)/config/loongarch/genopts/loongarch-strings
++	$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh header \
++    $(srcdir)/config/loongarch/genopts/loongarch-strings > \
++    tmp-loongarch-str.h
++	$(SHELL) $(srcdir)/../move-if-change tmp-loongarch-str.h $(LA_STR_H)
++	$(STAMP) s-loongarch-str
+ 
+ loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.c $(CONFIG_H) $(SYSTEM_H) \
+ 	coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
+@@ -31,15 +37,32 @@ loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.c $(CONFIG_H) $(SYSTEM_H)
+ 	$(srcdir)/config/loongarch/loongarch-c.c
+ 
+ loongarch-builtins.o: $(srcdir)/config/loongarch/loongarch-builtins.c $(CONFIG_H) \
+-  $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \
+-  $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \
+-  $(srcdir)/config/loongarch/loongarch-modes.def
++	$(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \
++	$(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \
++	$(srcdir)/config/loongarch/loongarch-modes.def
+ 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+-		$(srcdir)/config/loongarch/loongarch-builtins.c
+-loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
+-
+-comma=,
+-MULTILIB_OPTIONS    = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
+-MULTILIB_DIRNAMES   = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
++	$(srcdir)/config/loongarch/loongarch-builtins.c
++
++loongarch-driver.o : $(srcdir)/config/loongarch/loongarch-driver.c $(LA_STR_H) \
++	$(CONFIG_H) $(SYSTEM_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
++
++loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.c $(LA_STR_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
++
++loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.c $(LA_STR_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
++
++loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
++	$(CC) -c $(ALL_CFLAGS) $(INCLUDES) $<
++
++$(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
++s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
++	$(srcdir)/config/loongarch/genopts/loongarch.opt.in
++	$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \
++    $(srcdir)/config/loongarch/genopts/loongarch.opt.in \
++    > tmp-loongarch.opt
++	$(SHELL) $(srcdir)/../move-if-change tmp-loongarch.opt \
++    $(srcdir)/config/loongarch/loongarch.opt
++	$(STAMP) s-loongarch-opt
++
+diff --git a/gcc/config/loongarch/x-native b/gcc/config/loongarch/x-native
+deleted file mode 100644
+index 827d21f1a..000000000
+--- a/gcc/config/loongarch/x-native
++++ /dev/null
+@@ -1,3 +0,0 @@
+-driver-native.o : $(srcdir)/config/loongarch/driver-native.c \
+-  $(CONFIG_H) $(SYSTEM_H)
+-	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+diff --git a/libgcc/config/loongarch/crtfastmath.c b/libgcc/config/loongarch/crtfastmath.c
+index d7371de6d..5f7b298ac 100644
+--- a/libgcc/config/loongarch/crtfastmath.c
++++ b/libgcc/config/loongarch/crtfastmath.c
+@@ -1,30 +1,32 @@
+-/* Copyright (C) 2010-2018 Free Software Foundation, Inc.
++/* Copyright (C) 2020-2022 Free Software Foundation, Inc.
++   Contributed by Loongson Ltd.
++   Based on MIPS target for GNU compiler.
+ 
+-   This file is part of GCC.
++This file is part of GCC.
+ 
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published by
+-   the Free Software Foundation; either version 3, or (at your option)
+-   any later version.
++GCC is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
+ 
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
++GCC is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++License for more details.
+ 
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
+ 
+-   You should have received a copy of the GNU General Public License
+-   and a copy of the GCC Runtime Library Exception along with this
+-   program; see the files COPYING3 and COPYING.RUNTIME respectively.
+-   If not, see <http://www.gnu.org/licenses/>.  */
++You should have received a copy of the GNU General Public License
++and a copy of the GCC Runtime Library Exception along with this
++program; see the files COPYING3 and COPYING.RUNTIME respectively.
++If not, see <http://www.gnu.org/licenses/>.  */
+ 
+ #ifdef __loongarch_hard_float
+ 
+ /* Rounding control.  */
+-#define _FPU_RC_NEAREST 0x000     /* RECOMMENDED */
++#define _FPU_RC_NEAREST 0x000     /* RECOMMENDED.  */
+ #define _FPU_RC_ZERO    0x100
+ #define _FPU_RC_UP      0x200
+ #define _FPU_RC_DOWN    0x300
+@@ -33,18 +35,18 @@
+ #define _FPU_IEEE     0x0000001F
+ 
+ /* Macros for accessing the hardware control word.  */
+-#define _FPU_GETCW(cw) __asm__ ("movgr2fcsr %0,$r1" : "=r" (cw))
+-#define _FPU_SETCW(cw) __asm__ ("movfcsr2gr %0,$r1" : : "r" (cw))
++#define _FPU_GETCW(cw) __asm__ volatile ("movfcsr2gr %0,$r0" : "=r" (cw))
++#define _FPU_SETCW(cw) __asm__ volatile ("movgr2fcsr $r0,%0" : : "r" (cw))
+ 
+ static void __attribute__((constructor))
+ set_fast_math (void)
+ {
+   unsigned int fcr;
+ 
+-  /* round to nearest, IEEE exceptions disabled.  */
++  /* Flush to zero, round to nearest, IEEE exceptions disabled.  */
+   fcr = _FPU_RC_NEAREST;
+ 
+-  _FPU_SETCW(fcr);
++  _FPU_SETCW (fcr);
+ }
+ 
+-#endif /* __loongarch_hard_float */
++#endif /* __loongarch_hard_float  */
+diff --git a/libgcc/config/loongarch/crti.S b/libgcc/config/loongarch/crti.S
+deleted file mode 100644
+index dcd05afea..000000000
+--- a/libgcc/config/loongarch/crti.S
++++ /dev/null
+@@ -1,43 +0,0 @@
+-/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify it under
+-the terms of the GNU General Public License as published by the Free
+-Software Foundation; either version 3, or (at your option) any later
+-version.
+-
+-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+-WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-for more details.
+-
+-Under Section 7 of GPL version 3, you are granted additional
+-permissions described in the GCC Runtime Library Exception, version
+-3.1, as published by the Free Software Foundation.
+-
+-You should have received a copy of the GNU General Public License and
+-a copy of the GCC Runtime Library Exception along with this program;
+-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-<http://www.gnu.org/licenses/>.  */
+-
+-/* 4 slots for argument spill area.  1 for cpreturn, 1 for stack.
+-   Return spill offset of 40 and 20.  Aligned to 16 bytes for n32.  */
+-
+-	.section .init,"ax",@progbits
+-	.globl	_init
+-	.type	_init,@function
+-_init:
+-	addi.d   $r3,$r3,-48
+-	st.d      $r1,$r3,40
+-	addi.d   $r3,$r3,48
+-	jirl	$r0,$r1,0
+-
+-	.section .fini,"ax",@progbits
+-	.globl	_fini
+-	.type	_fini,@function
+-_fini:
+-	addi.d   $r3,$r3,-48
+-	st.d      $r1,$r3,40
+-	addi.d   $r3,$r3,48
+-	jirl	$r0,$r1,0
+diff --git a/libgcc/config/loongarch/crtn.S b/libgcc/config/loongarch/crtn.S
+deleted file mode 100644
+index 91d9d5e7f..000000000
+--- a/libgcc/config/loongarch/crtn.S
++++ /dev/null
+@@ -1,39 +0,0 @@
+-/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify it under
+-the terms of the GNU General Public License as published by the Free
+-Software Foundation; either version 3, or (at your option) any later
+-version.
+-
+-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+-WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-for more details.
+-
+-Under Section 7 of GPL version 3, you are granted additional
+-permissions described in the GCC Runtime Library Exception, version
+-3.1, as published by the Free Software Foundation.
+-
+-You should have received a copy of the GNU General Public License and
+-a copy of the GCC Runtime Library Exception along with this program;
+-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-<http://www.gnu.org/licenses/>.  */
+-
+-/* 4 slots for argument spill area.  1 for cpreturn, 1 for stack.
+-   Return spill offset of 40 and 20.  Aligned to 16 bytes for n32.  */
+-
+-
+-	.section .init,"ax",@progbits
+-init:	
+-	ld.d      $r1,$r3,40
+-	addi.d	$r3,$r3,48
+-	jirl	$r0,$r1,0
+-
+-	.section .fini,"ax",@progbits
+-fini:	
+-	ld.d	$r1,$r3,40
+-	addi.d	$r3,$r3,48
+-	jirl	$r0,$r1,0
+-
+diff --git a/libgcc/config/loongarch/gthr-loongnixsde.h b/libgcc/config/loongarch/gthr-loongnixsde.h
+deleted file mode 100644
+index f62b57318..000000000
+--- a/libgcc/config/loongarch/gthr-loongnixsde.h
++++ /dev/null
+@@ -1,237 +0,0 @@
+-/* LARCH SDE threads compatibility routines for libgcc2 and libobjc.  */
+-/* Compile this one with gcc.  */
+-/* Copyright (C) 2006-2018 Free Software Foundation, Inc.
+-   Contributed by Nigel Stephens <nigel@loongarch.com>
+-
+-This file is part of GCC.
+-
+-GCC is free software; you can redistribute it and/or modify it under
+-the terms of the GNU General Public License as published by the Free
+-Software Foundation; either version 3, or (at your option) any later
+-version.
+-
+-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+-WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-for more details.
+-
+-Under Section 7 of GPL version 3, you are granted additional
+-permissions described in the GCC Runtime Library Exception, version
+-3.1, as published by the Free Software Foundation.
+-
+-You should have received a copy of the GNU General Public License and
+-a copy of the GCC Runtime Library Exception along with this program;
+-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-<http://www.gnu.org/licenses/>.  */
+-
+-#ifndef GCC_GTHR_LARCHSDE_H
+-#define GCC_GTHR_LARCHSDE_H
+-
+-/* LARCH SDE threading API specific definitions.
+-   Easy, since the interface is pretty much one-to-one.  */
+-
+-#define __GTHREADS 1
+-
+-#include <sdethread.h>
+-#include <unistd.h>
+-
+-#ifdef __cplusplus
+-extern "C" {
+-#endif
+-
+-typedef __sdethread_key_t __gthread_key_t;
+-typedef __sdethread_once_t __gthread_once_t;
+-typedef __sdethread_mutex_t __gthread_mutex_t;
+-
+-typedef struct {
+-  long depth;
+-  __sdethread_t owner;
+-  __sdethread_mutex_t actual;
+-} __gthread_recursive_mutex_t;
+-
+-#define __GTHREAD_MUTEX_INIT __SDETHREAD_MUTEX_INITIALIZER("gthr")
+-#define __GTHREAD_ONCE_INIT __SDETHREAD_ONCE_INIT
+-static inline int
+-__gthread_recursive_mutex_init_function(__gthread_recursive_mutex_t *__mutex);
+-#define __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION __gthread_recursive_mutex_init_function
+-
+-#if SUPPORTS_WEAK && GTHREAD_USE_WEAK
+-# define __gthrw(name) \
+-  static __typeof(name) __gthrw_ ## name __attribute__ ((__weakref__(#name)));
+-# define __gthrw_(name) __gthrw_ ## name
+-#else
+-# define __gthrw(name)
+-# define __gthrw_(name) name
+-#endif
+-
+-__gthrw(__sdethread_once)
+-__gthrw(__sdethread_key_create)
+-__gthrw(__sdethread_key_delete)
+-__gthrw(__sdethread_getspecific)
+-__gthrw(__sdethread_setspecific)
+-
+-__gthrw(__sdethread_self)
+-
+-__gthrw(__sdethread_mutex_lock)
+-__gthrw(__sdethread_mutex_trylock)
+-__gthrw(__sdethread_mutex_unlock)
+-
+-__gthrw(__sdethread_mutex_init)
+-
+-__gthrw(__sdethread_threading)
+-
+-#if SUPPORTS_WEAK && GTHREAD_USE_WEAK
+-
+-static inline int
+-__gthread_active_p (void)
+-{
+-  return !!(void *)&__sdethread_threading;
+-}
+-
+-#else /* not SUPPORTS_WEAK */
+-
+-static inline int
+-__gthread_active_p (void)
+-{
+-  return 1;
+-}
+-
+-#endif /* SUPPORTS_WEAK */
+-
+-static inline int
+-__gthread_once (__gthread_once_t *__once, void (*__func) (void))
+-{
+-  if (__gthread_active_p ())
+-    return __gthrw_(__sdethread_once) (__once, __func);
+-  else
+-    return -1;
+-}
+-
+-static inline int
+-__gthread_key_create (__gthread_key_t *__key, void (*__dtor) (void *))
+-{
+-  return __gthrw_(__sdethread_key_create) (__key, __dtor);
+-}
+-
+-static inline int
+-__gthread_key_delete (__gthread_key_t __key)
+-{
+-  return __gthrw_(__sdethread_key_delete) (__key);
+-}
+-
+-static inline void *
+-__gthread_getspecific (__gthread_key_t __key)
+-{
+-  return __gthrw_(__sdethread_getspecific) (__key);
+-}
+-
+-static inline int
+-__gthread_setspecific (__gthread_key_t __key, const void *__ptr)
+-{
+-  return __gthrw_(__sdethread_setspecific) (__key, __ptr);
+-}
+-
+-static inline int
+-__gthread_mutex_destroy (__gthread_mutex_t * UNUSED(__mutex))
+-{
+-  return 0;
+-}
+-
+-static inline int
+-__gthread_mutex_lock (__gthread_mutex_t *__mutex)
+-{
+-  if (__gthread_active_p ())
+-    return __gthrw_(__sdethread_mutex_lock) (__mutex);
+-  else
+-    return 0;
+-}
+-
+-static inline int
+-__gthread_mutex_trylock (__gthread_mutex_t *__mutex)
+-{
+-  if (__gthread_active_p ())
+-    return __gthrw_(__sdethread_mutex_trylock) (__mutex);
+-  else
+-    return 0;
+-}
+-
+-static inline int
+-__gthread_mutex_unlock (__gthread_mutex_t *__mutex)
+-{
+-  if (__gthread_active_p ())
+-    return __gthrw_(__sdethread_mutex_unlock) (__mutex);
+-  else
+-    return 0;
+-}
+-
+-static inline int
+-__gthread_recursive_mutex_init_function (__gthread_recursive_mutex_t *__mutex)
+-{
+-  __mutex->depth = 0;
+-  __mutex->owner = __gthrw_(__sdethread_self) ();
+-  return __gthrw_(__sdethread_mutex_init) (&__mutex->actual, NULL);
+-}
+-
+-static inline int
+-__gthread_recursive_mutex_lock (__gthread_recursive_mutex_t *__mutex)
+-{
+-  if (__gthread_active_p ())
+-    {
+-      __sdethread_t __me = __gthrw_(__sdethread_self) ();
+-
+-      if (__mutex->owner != __me)
+-	{
+-	  __gthrw_(__sdethread_mutex_lock) (&__mutex->actual);
+-	  __mutex->owner = __me;
+-	}
+-
+-      __mutex->depth++;
+-    }
+-  return 0;
+-}
+-
+-static inline int
+-__gthread_recursive_mutex_trylock (__gthread_recursive_mutex_t *__mutex)
+-{
+-  if (__gthread_active_p ())
+-    {
+-      __sdethread_t __me = __gthrw_(__sdethread_self) ();
+-
+-      if (__mutex->owner != __me)
+-	{
+-	  if (__gthrw_(__sdethread_mutex_trylock) (&__mutex->actual))
+-	    return 1;
+-	  __mutex->owner = __me;
+-	}
+-
+-      __mutex->depth++;
+-    }
+-  return 0;
+-}
+-
+-static inline int
+-__gthread_recursive_mutex_unlock (__gthread_recursive_mutex_t *__mutex)
+-{
+-  if (__gthread_active_p ())
+-    {
+-      if (--__mutex->depth == 0)
+-	{
+-	   __mutex->owner = (__sdethread_t) 0;
+-	   __gthrw_(__sdethread_mutex_unlock) (&__mutex->actual);
+-	}
+-    }
+-  return 0;
+-}
+-
+-static inline int
+-__gthread_recursive_mutex_destroy (__gthread_recursive_mutex_t
+-                                   * UNUSED(__mutex))
+-{
+-  return 0;
+-}
+-
+-#ifdef __cplusplus
+-}
+-#endif
+-
+-#endif /* ! GCC_GTHR_LARCHSDE_H */
+diff --git a/libgcc/config/loongarch/linux-unwind.h b/libgcc/config/loongarch/linux-unwind.h
+index d77dfb058..30603e44f 100644
+--- a/libgcc/config/loongarch/linux-unwind.h
++++ b/libgcc/config/loongarch/linux-unwind.h
+@@ -1,5 +1,5 @@
+-/* DWARF2 EH unwinding support for LARCH Linux.
+-   Copyright (C) 2004-2018 Free Software Foundation, Inc.
++/* DWARF2 EH unwinding support for LoongArch Linux.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+@@ -34,26 +34,27 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ 
+ static _Unwind_Reason_Code
+ loongarch_fallback_frame_state (struct _Unwind_Context *context,
+-			   _Unwind_FrameState *fs)
++				_Unwind_FrameState *fs)
+ {
+   u_int32_t *pc = (u_int32_t *) context->ra;
+   struct sigcontext *sc;
+   _Unwind_Ptr new_cfa;
+   int i;
+ 
+-  /* 03822c0b dli a7, 0x8b (sigreturn) */
+-  /* 002b0000 syscall 0 */
++  /* 03822c0b li.d a7, 0x8b (sigreturn)  */
++  /* 002b0000 syscall 0  */
+   if (pc[1] != 0x002b0000)
+     return _URC_END_OF_STACK;
+   if (pc[0] == 0x03822c0b)
+     {
+-      struct rt_sigframe {
++      struct rt_sigframe
++      {
+ 	u_int32_t ass[4];  /* Argument save space for o32.  */
+ 	u_int32_t trampoline[2];
+ 	siginfo_t info;
+ 	ucontext_t uc;
+       } *rt_ = context->cfa;
+-      sc = &rt_->uc.uc_mcontext;
++      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+     }
+   else
+     return _URC_END_OF_STACK;
+@@ -63,17 +64,17 @@ loongarch_fallback_frame_state (struct _Unwind_Context *context,
+   fs->regs.cfa_reg = __LIBGCC_STACK_POINTER_REGNUM__;
+   fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa;
+ 
+-  for (i = 0; i < 32; i++) {
+-    fs->regs.reg[i].how = REG_SAVED_OFFSET;
+-    fs->regs.reg[i].loc.offset
+-      = (_Unwind_Ptr)&(sc->sc_regs[i]) - new_cfa;
+-  }
++  for (i = 0; i < 32; i++)
++    {
++      fs->regs.reg[i].how = REG_SAVED_OFFSET;
++      fs->regs.reg[i].loc.offset = (_Unwind_Ptr) & (sc->sc_regs[i]) - new_cfa;
++    }
+ 
+   fs->signal_frame = 1;
+   fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].how
+     = REG_SAVED_VAL_OFFSET;
+   fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].loc.offset
+-    = (_Unwind_Ptr)(sc->sc_pc) - new_cfa;
++    = (_Unwind_Ptr) (sc->sc_pc) - new_cfa;
+   fs->retaddr_column = __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__;
+ 
+   return _URC_NO_REASON;
+diff --git a/libgcc/config/loongarch/sfp-machine.h b/libgcc/config/loongarch/sfp-machine.h
+index f7800a003..420f94274 100644
+--- a/libgcc/config/loongarch/sfp-machine.h
++++ b/libgcc/config/loongarch/sfp-machine.h
+@@ -1,5 +1,5 @@
+-/* softfp machine description for LARCH.
+-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
++/* softfp machine description for LoongArch.
++   Copyright (C) 2020-2022 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+@@ -23,49 +23,49 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ <http://www.gnu.org/licenses/>.  */
+ 
+ #ifdef __loongarch64
+-#define _FP_W_TYPE_SIZE		64
+-#define _FP_W_TYPE		unsigned long long
+-#define _FP_WS_TYPE		signed long long
+-#define _FP_I_TYPE		long long
++#define _FP_W_TYPE_SIZE 64
++#define _FP_W_TYPE unsigned long long
++#define _FP_WS_TYPE signed long long
++#define _FP_I_TYPE long long
+ 
+ typedef int TItype __attribute__ ((mode (TI)));
+ typedef unsigned int UTItype __attribute__ ((mode (TI)));
+ #define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype))
+ 
+-#define _FP_MUL_MEAT_S(R,X,Y)				\
+-  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+-#define _FP_MUL_MEAT_D(R,X,Y)				\
+-  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+-#define _FP_MUL_MEAT_Q(R,X,Y)				\
+-  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
++#define _FP_MUL_MEAT_S(R, X, Y) \
++  _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
++#define _FP_MUL_MEAT_D(R, X, Y) \
++  _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
++#define _FP_MUL_MEAT_Q(R, X, Y) \
++  _FP_MUL_MEAT_2_wide (_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+ 
+-#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+-#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
+-#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv(Q,R,X,Y)
++#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm (S, R, X, Y)
++#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_1_udiv_norm (D, R, X, Y)
++#define _FP_DIV_MEAT_Q(R, X, Y) _FP_DIV_MEAT_2_udiv (Q, R, X, Y)
+ 
+-# define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+-# define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1)
+-# define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1
++#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
++#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1)
++#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1
+ #else
+-#define _FP_W_TYPE_SIZE		32
+-#define _FP_W_TYPE		unsigned int
+-#define _FP_WS_TYPE		signed int
+-#define _FP_I_TYPE		int
+-
+-#define _FP_MUL_MEAT_S(R,X,Y)				\
+-  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+-#define _FP_MUL_MEAT_D(R,X,Y)				\
+-  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+-#define _FP_MUL_MEAT_Q(R,X,Y)				\
+-  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+-
+-#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+-#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+-#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+-
+-# define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+-# define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+-# define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
++#define _FP_W_TYPE_SIZE 32
++#define _FP_W_TYPE unsigned int
++#define _FP_WS_TYPE signed int
++#define _FP_I_TYPE int
++
++#define _FP_MUL_MEAT_S(R, X, Y) \
++  _FP_MUL_MEAT_1_wide (_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
++#define _FP_MUL_MEAT_D(R, X, Y) \
++  _FP_MUL_MEAT_2_wide (_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
++#define _FP_MUL_MEAT_Q(R, X, Y) \
++  _FP_MUL_MEAT_4_wide (_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
++
++#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm (S, R, X, Y)
++#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv (D, R, X, Y)
++#define _FP_DIV_MEAT_Q(R, X, Y) _FP_DIV_MEAT_4_udiv (Q, R, X, Y)
++
++#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
++#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1
++#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+ #endif
+ 
+ /* The type of the result of a floating point comparison.  This must
+@@ -73,76 +73,80 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
+ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+ #define CMPtype __gcc_CMPtype
+ 
+-#define _FP_NANSIGN_S		0
+-#define _FP_NANSIGN_D		0
+-#define _FP_NANSIGN_Q		0
++#define _FP_NANSIGN_S 0
++#define _FP_NANSIGN_D 0
++#define _FP_NANSIGN_Q 0
+ 
+ #define _FP_KEEPNANFRACP 1
+-# define _FP_QNANNEGATEDP 0
++#define _FP_QNANNEGATEDP 0
+ 
+ /* NaN payloads should be preserved for NAN2008.  */
+-# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)	\
+-  do						\
+-    {						\
+-      R##_s = X##_s;				\
+-      _FP_FRAC_COPY_##wc (R, X);		\
+-      R##_c = FP_CLS_NAN;			\
+-    }						\
++#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
++  do \
++    { \
++      R##_s = X##_s; \
++      _FP_FRAC_COPY_##wc (R, X); \
++      R##_c = FP_CLS_NAN; \
++    } \
+   while (0)
+ 
+ #ifdef __loongarch_hard_float
+-#define FP_EX_INVALID           0x100000
+-#define FP_EX_DIVZERO           0x080000
+-#define FP_EX_OVERFLOW          0x040000
+-#define FP_EX_UNDERFLOW         0x020000
+-#define FP_EX_INEXACT           0x010000
++#define FP_EX_INVALID 0x100000
++#define FP_EX_DIVZERO 0x080000
++#define FP_EX_OVERFLOW 0x040000
++#define FP_EX_UNDERFLOW 0x020000
++#define FP_EX_INEXACT 0x010000
+ #define FP_EX_ALL \
+-	(FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \
+-	 | FP_EX_INEXACT)
++  (FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \
++   | FP_EX_INEXACT)
+ 
+-#define FP_EX_ENABLE_SHIFT	16
+-#define FP_EX_CAUSE_SHIFT	8
++#define FP_EX_ENABLE_SHIFT 16
++#define FP_EX_CAUSE_SHIFT 8
+ 
+-#define FP_RND_NEAREST		0x000
+-#define FP_RND_ZERO		0x100
+-#define FP_RND_PINF		0x200
+-#define FP_RND_MINF		0x300
+-#define FP_RND_MASK		0x300
++#define FP_RND_NEAREST 0x000
++#define FP_RND_ZERO 0x100
++#define FP_RND_PINF 0x200
++#define FP_RND_MINF 0x300
++#define FP_RND_MASK 0x300
+ 
+ #define _FP_DECL_EX \
+   unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST
+ 
+-#define FP_INIT_ROUNDMODE			\
+-  do {						\
+-    _fcsr = __builtin_loongarch_movfcsr2gr (0);		\
+-  } while (0)
++#define FP_INIT_ROUNDMODE \
++  do \
++    { \
++      _fcsr = __builtin_loongarch_movfcsr2gr (0); \
++    } \
++  while (0)
+ 
+ #define FP_ROUNDMODE (_fcsr & FP_RND_MASK)
+ 
+ #define FP_TRAPPING_EXCEPTIONS ((_fcsr << FP_EX_ENABLE_SHIFT) & FP_EX_ALL)
+ 
+-#define FP_HANDLE_EXCEPTIONS				\
+-  do {							\
+-    _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT);		\
+-    _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT);	\
+-    __builtin_loongarch_movgr2fcsr (0, _fcsr);			\
+-  } while (0)
++#define FP_HANDLE_EXCEPTIONS \
++  do \
++    { \
++      _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \
++      _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \
++      __builtin_loongarch_movgr2fcsr (0, _fcsr); \
++    } \
++  while (0)
+ 
+ #else
+-#define FP_EX_INVALID           (1 << 4)
+-#define FP_EX_DIVZERO           (1 << 3)
+-#define FP_EX_OVERFLOW          (1 << 2)
+-#define FP_EX_UNDERFLOW         (1 << 1)
+-#define FP_EX_INEXACT           (1 << 0)
++#define FP_EX_INVALID (1 << 4)
++#define FP_EX_DIVZERO (1 << 3)
++#define FP_EX_OVERFLOW (1 << 2)
++#define FP_EX_UNDERFLOW (1 << 1)
++#define FP_EX_INEXACT (1 << 0)
+ #endif
+ 
+ #define _FP_TININESS_AFTER_ROUNDING 1
+ 
+-#define	__LITTLE_ENDIAN	1234
++#define __LITTLE_ENDIAN 1234
+ 
+-# define __BYTE_ORDER __LITTLE_ENDIAN
++#define __BYTE_ORDER __LITTLE_ENDIAN
+ 
+ /* Define ALIASNAME as a strong alias for NAME.  */
+-# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+-# define _strong_alias(name, aliasname) \
++#define strong_alias(name, aliasname) _strong_alias (name, aliasname)
++#define _strong_alias(name, aliasname) \
+   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+diff --git a/libgcc/config/loongarch/t-elf b/libgcc/config/loongarch/t-elf
+deleted file mode 100644
+index 651f10a53..000000000
+--- a/libgcc/config/loongarch/t-elf
++++ /dev/null
+@@ -1,3 +0,0 @@
+-# We must build libgcc2.a with -G 0, in case the user wants to link
+-# without the $gp register.
+-HOST_LIBGCC2_CFLAGS += -G 0
+diff --git a/libgcc/config/loongarch/t-loongarch b/libgcc/config/loongarch/t-loongarch
+index 9508cb2fc..2a7dbf6ca 100644
+--- a/libgcc/config/loongarch/t-loongarch
++++ b/libgcc/config/loongarch/t-loongarch
+@@ -5,5 +5,3 @@ softfp_int_modes := si di
+ softfp_extensions :=
+ softfp_truncations :=
+ softfp_exclude_libgcc2 := n
+-
+-LIB2ADD_ST += $(srcdir)/config/loongarch/lib2funcs.c
+diff --git a/libgcc/config/loongarch/t-sdemtk b/libgcc/config/loongarch/t-sdemtk
+deleted file mode 100644
+index a06e828b5..000000000
+--- a/libgcc/config/loongarch/t-sdemtk
++++ /dev/null
+@@ -1,3 +0,0 @@
+-# Don't build FPBIT and DPBIT; we'll be using the SDE soft-float library.
+-FPBIT =
+-DPBIT =
+diff --git a/libgcc/config/loongarch/t-vr b/libgcc/config/loongarch/t-vr
+deleted file mode 100644
+index e69de29bb..000000000
+-- 
+2.39.3
+
diff --git a/Improve-specs-processing-to-allow-in-function-argume.patch b/Improve-specs-processing-to-allow-in-function-argume.patch
new file mode 100644
index 0000000..b7504b4
--- /dev/null
+++ b/Improve-specs-processing-to-allow-in-function-argume.patch
@@ -0,0 +1,220 @@
+From 6e6de5b31ac9b5a523fbf60099d00124d99aa0d0 Mon Sep 17 00:00:00 2001
+From: Lixing <lixing@loongosn.cn>
+Date: Mon, 31 Jul 2023 10:08:23 +0800
+Subject: [PATCH 2/2] Improve specs processing to allow %* in function
+ arguments
+
+2018-07-31  Olivier Hainque  <hainque@adacore.com>
+
+	* gcc.c (handle_spec_function): Accept a soft_matched_part
+	argument, as do_spec_1.  Pass it down to ...
+	(eval_spec_function): Accept a soft_matched_part argument,
+	and pass it down to ...
+	(do_spec_2): Accept a soft_matched_part argument, and pass
+	it down to do_spec_1.
+	(do_spec_1): Pass soft_matched_part to handle_spec_function.
+	(handle_braces): Update call to handle_spec_function.
+	(driver::set_up_specs): Update calls to do_spec_2.
+	(compare_debug_dump_opt_spec_function): Likewise.
+	(compare_debug_self_opt_spec_function): Likewise.
+
+[Upstream] 1102fd64dbb767 (Deleted ChangeLog)
+Link: https://gcc.gnu.org/git/?p=gcc.git;a=patch;f=gcc/gcc.cc;h=1102fd64dbb76784ed46ff81bf905f6c52d296fc
+---
+ gcc/gcc.c | 51 +++++++++++++++++++++++++++++----------------------
+ 1 file changed, 29 insertions(+), 22 deletions(-)
+
+diff --git a/gcc/gcc.c b/gcc/gcc.c
+index 3b87e91b6..3c46e0769 100644
+--- a/gcc/gcc.c
++++ b/gcc/gcc.c
+@@ -354,12 +354,12 @@ static inline void mark_matching_switches (const char *, const char *, int);
+ static inline void process_marked_switches (void);
+ static const char *process_brace_body (const char *, const char *, const char *, int, int);
+ static const struct spec_function *lookup_spec_function (const char *);
+-static const char *eval_spec_function (const char *, const char *);
+-static const char *handle_spec_function (const char *, bool *);
++static const char *eval_spec_function (const char *, const char *, const char *);
++static const char *handle_spec_function (const char *, bool *, const char *);
+ static char *save_string (const char *, int);
+ static void set_collect_gcc_options (void);
+ static int do_spec_1 (const char *, int, const char *);
+-static int do_spec_2 (const char *);
++static int do_spec_2 (const char *, const char *);
+ static void do_option_spec (const char *, const char *);
+ static void do_self_spec (const char *);
+ static const char *find_file (const char *);
+@@ -4865,7 +4865,7 @@ do_spec (const char *spec)
+ {
+   int value;
+ 
+-  value = do_spec_2 (spec);
++  value = do_spec_2 (spec, NULL);
+ 
+   /* Force out any unfinished command.
+      If -pipe, this forces out the last command if it ended in `|'.  */
+@@ -4884,8 +4884,11 @@ do_spec (const char *spec)
+   return value;
+ }
+ 
++/* Process the spec SPEC, with SOFT_MATCHED_PART designating the current value
++   of a matched * pattern which may be re-injected by way of %*.  */
++
+ static int
+-do_spec_2 (const char *spec)
++do_spec_2 (const char *spec, const char *soft_matched_part)
+ {
+   int result;
+ 
+@@ -4898,14 +4901,13 @@ do_spec_2 (const char *spec)
+   input_from_pipe = 0;
+   suffix_subst = NULL;
+ 
+-  result = do_spec_1 (spec, 0, NULL);
++  result = do_spec_1 (spec, 0, soft_matched_part);
+ 
+   end_going_arg ();
+ 
+   return result;
+ }
+ 
+-
+ /* Process the given spec string and add any new options to the end
+    of the switches/n_switches array.  */
+ 
+@@ -4963,7 +4965,7 @@ do_self_spec (const char *spec)
+ {
+   int i;
+ 
+-  do_spec_2 (spec);
++  do_spec_2 (spec, NULL);
+   do_spec_1 (" ", 0, NULL);
+ 
+   /* Mark %<S switches processed by do_self_spec to be ignored permanently.
+@@ -5866,7 +5868,7 @@ do_spec_1 (const char *spec, int inswitch, const char *soft_matched_part)
+ 	    break;
+ 
+ 	  case ':':
+-	    p = handle_spec_function (p, NULL);
++	    p = handle_spec_function (p, NULL, soft_matched_part);
+ 	    if (p == 0)
+ 	      return -1;
+ 	    break;
+@@ -6028,7 +6030,8 @@ lookup_spec_function (const char *name)
+ /* Evaluate a spec function.  */
+ 
+ static const char *
+-eval_spec_function (const char *func, const char *args)
++eval_spec_function (const char *func, const char *args,
++		    const char *soft_matched_part)
+ {
+   const struct spec_function *sf;
+   const char *funcval;
+@@ -6078,7 +6081,7 @@ eval_spec_function (const char *func, const char *args)
+      arguments.  */
+ 
+   alloc_args ();
+-  if (do_spec_2 (args) < 0)
++  if (do_spec_2 (args, soft_matched_part) < 0)
+     fatal_error (input_location, "error in args to spec function %qs", func);
+ 
+   /* argbuf_index is an index for the next argument to be inserted, and
+@@ -6115,10 +6118,14 @@ eval_spec_function (const char *func, const char *args)
+    NULL if no processing is required.
+ 
+    If RETVAL_NONNULL is not NULL, then store a bool whether function
+-   returned non-NULL.  */
++   returned non-NULL.
++
++   SOFT_MATCHED_PART holds the current value of a matched * pattern, which
++   may be re-expanded with a %* as part of the function arguments.  */
+ 
+ static const char *
+-handle_spec_function (const char *p, bool *retval_nonnull)
++handle_spec_function (const char *p, bool *retval_nonnull,
++		      const char *soft_matched_part)
+ {
+   char *func, *args;
+   const char *endp, *funcval;
+@@ -6161,7 +6168,7 @@ handle_spec_function (const char *p, bool *retval_nonnull)
+ 
+   /* p now points to just past the end of the spec function expression.  */
+ 
+-  funcval = eval_spec_function (func, args);
++  funcval = eval_spec_function (func, args, soft_matched_part);
+   if (funcval != NULL && do_spec_1 (funcval, 0, NULL) < 0)
+     p = NULL;
+   if (retval_nonnull)
+@@ -6315,7 +6322,7 @@ handle_braces (const char *p)
+ 	{
+ 	  atom = NULL;
+ 	  end_atom = NULL;
+-	  p = handle_spec_function (p + 2, &a_matched);
++	  p = handle_spec_function (p + 2, &a_matched, NULL);
+ 	}
+       else
+ 	{
+@@ -7553,7 +7560,7 @@ driver::set_up_specs () const
+   /* Process sysroot_suffix_spec.  */
+   if (*sysroot_suffix_spec != 0
+       && !no_sysroot_suffix
+-      && do_spec_2 (sysroot_suffix_spec) == 0)
++      && do_spec_2 (sysroot_suffix_spec, NULL) == 0)
+     {
+       if (argbuf.length () > 1)
+         error ("spec failure: more than one arg to SYSROOT_SUFFIX_SPEC");
+@@ -7577,7 +7584,7 @@ driver::set_up_specs () const
+   /* Process sysroot_hdrs_suffix_spec.  */
+   if (*sysroot_hdrs_suffix_spec != 0
+       && !no_sysroot_suffix
+-      && do_spec_2 (sysroot_hdrs_suffix_spec) == 0)
++      && do_spec_2 (sysroot_hdrs_suffix_spec, NULL) == 0)
+     {
+       if (argbuf.length () > 1)
+         error ("spec failure: more than one arg to SYSROOT_HEADERS_SUFFIX_SPEC");
+@@ -7587,7 +7594,7 @@ driver::set_up_specs () const
+ 
+   /* Look for startfiles in the standard places.  */
+   if (*startfile_prefix_spec != 0
+-      && do_spec_2 (startfile_prefix_spec) == 0
++      && do_spec_2 (startfile_prefix_spec, NULL) == 0
+       && do_spec_1 (" ", 0, NULL) == 0)
+     {
+       const char *arg;
+@@ -9717,7 +9724,7 @@ compare_debug_dump_opt_spec_function (int arg,
+     fatal_error (input_location,
+ 		 "too many arguments to %%:compare-debug-dump-opt");
+ 
+-  do_spec_2 ("%{fdump-final-insns=*:%*}");
++  do_spec_2 ("%{fdump-final-insns=*:%*}", NULL);
+   do_spec_1 (" ", 0, NULL);
+ 
+   if (argbuf.length () > 0
+@@ -9735,13 +9742,13 @@ compare_debug_dump_opt_spec_function (int arg,
+ 
+       if (argbuf.length () > 0)
+ 	{
+-	  do_spec_2 ("%{o*:%*}%{!o:%{!S:%b%O}%{S:%b.s}}");
++	  do_spec_2 ("%{o*:%*}%{!o:%{!S:%b%O}%{S:%b.s}}", NULL);
+ 	  ext = ".gkd";
+ 	}
+       else if (!compare_debug)
+ 	return NULL;
+       else
+-	do_spec_2 ("%g.gkd");
++	do_spec_2 ("%g.gkd", NULL);
+ 
+       do_spec_1 (" ", 0, NULL);
+ 
+@@ -9793,7 +9800,7 @@ compare_debug_self_opt_spec_function (int arg,
+   if (compare_debug >= 0)
+     return NULL;
+ 
+-  do_spec_2 ("%{c|S:%{o*:%*}}");
++  do_spec_2 ("%{c|S:%{o*:%*}}", NULL);
+   do_spec_1 (" ", 0, NULL);
+ 
+   if (argbuf.length () > 0)
+-- 
+2.39.3
+
diff --git a/LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch b/LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch
new file mode 100644
index 0000000..f8de504
--- /dev/null
+++ b/LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch
@@ -0,0 +1,101 @@
+From 08d337cc5186e47949b60e4b3eeebd1f763337e0 Mon Sep 17 00:00:00 2001
+From: Lixing <lixing@loongosn.cn>
+Date: Mon, 31 Jul 2023 09:46:12 +0800
+Subject: [PATCH 1/2] LoongArch: Remove NOOP_TRUNCATION and fix extendsidi2
+
+We can safely convert value from inprec to outprec because we hold on
+extention if needed.
+---
+ gcc/config/loongarch/loongarch.c  | 11 --------
+ gcc/config/loongarch/loongarch.md | 44 +++++++------------------------
+ 2 files changed, 9 insertions(+), 46 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.c b/gcc/config/loongarch/loongarch.c
+index a1dde5a0f..f8f96329c 100644
+--- a/gcc/config/loongarch/loongarch.c
++++ b/gcc/config/loongarch/loongarch.c
+@@ -10313,14 +10313,6 @@ loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+   return mode;
+ }
+ 
+-/* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
+-
+-static bool
+-loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
+-{
+-  return !TARGET_64BIT || inprec <= 32 || outprec > 32;
+-}
+-
+ /* Implement TARGET_STARTING_FRAME_OFFSET.  See loongarch_compute_frame_info
+    for details about the frame layout.  */
+ 
+@@ -10940,9 +10932,6 @@ loongarch_prefetch_cookie (rtx write, rtx locality)
+ #undef TARGET_CAN_CHANGE_MODE_CLASS
+ #define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class
+ 
+-#undef TARGET_TRULY_NOOP_TRUNCATION
+-#define TARGET_TRULY_NOOP_TRUNCATION loongarch_truly_noop_truncation
+-
+ #undef TARGET_CONSTANT_ALIGNMENT
+ #define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment
+ 
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 097c9f4db..a08c4a62c 100644
+--- a/gcc/config/loongarch/loongarch.md
++++ b/gcc/config/loongarch/loongarch.md
+@@ -1433,43 +1433,17 @@
+ ;;
+ ;;  ....................
+ 
+-(define_insn_and_split "extendsidi2"
++(define_insn "extendsidi2"
+   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+-	(sign_extend:DI
+-	    (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m,k")))]
++        (sign_extend:DI
++            (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k")))]
+   "TARGET_64BIT"
+-{
+-  switch (which_alternative)
+-    {
+-    case 0:
+-      return "#";
+-    case 1:
+-      {
+-      rtx offset = XEXP (operands[1], 0);
+-      if (GET_CODE (offset) == PLUS)
+-	offset = XEXP (offset, 1);
+-      else
+-	offset = const0_rtx;
+-      if (const_arith_operand (offset, Pmode) || (offset == const0_rtx))
+-	return "ld.w\t%0,%1";
+-      else
+-	return "ldptr.w\t%0,%1";
+-      }
+-    case 2:
+-	return "ld.w\t%0,%1";
+-    case 3:
+-	return "ldx.w\t%0,%1";
+-    default:
+-      gcc_unreachable ();
+-    }
+-}
+-  "&& reload_completed && register_operand (operands[1], VOIDmode)"
+-  [(const_int 0)]
+-{
+-  emit_note (NOTE_INSN_DELETED);
+-  DONE;
+-}
+-  [(set_attr "move_type" "move,load,load,load")
++  "@
++   slli.w\t%0,%1,0
++   ldptr.w\t%0,%1
++   ld.w\t%0,%1
++   ldx.w\t%0,%1"
++  [(set_attr "move_type" "sll0,load,load,load")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "extend<SHORT:mode><GPR:mode>2"
+-- 
+2.39.3
+
diff --git a/gcc.spec b/gcc.spec
index b4e229f..1ec052b 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,4 +1,4 @@
-%define anolis_release .0.3
+%define anolis_release .0.4
 %global DATE 20210514
 %global gitrev a3253c88425835d5b339d6998a1110a66ccd8b44
 %global gcc_version 8.5.0
@@ -291,6 +291,7 @@ Patch27: gcc8-libgfortran-default-values.patch
 Patch30: gcc8-rh1668903-1.patch
 Patch31: gcc8-rh1668903-2.patch
 Patch32: gcc8-rh1668903-3.patch
+Patch33: Improve-specs-processing-to-allow-in-function-argume.patch
 
 Patch1000: nvptx-tools-no-ptxas.patch
 Patch1001: nvptx-tools-build.patch
@@ -304,6 +305,8 @@ Patch1004: 0002-loongarch-fix-multilib-osdirnames-to-lib64.patch
 Patch1005: 0001-LoongArch-Fixup-configure-file-error.patch
 Patch1006: 0002-LoongArch-Rename-config-file-for-loongarch.patch
 Patch1007: LoongArch-Fix-atomic_exchange-expanding-PR107713.patch
+Patch1008: 0001-Sync-to-gcc-8-vec-36.patch
+Patch1009: LoongArch-Remove-NOOP_TRUNCATION-and-fix-extendsidi2.patch
 
 
 # On ARM EABI systems, we do want -gnueabi to be part of the
@@ -886,6 +889,7 @@ to NVidia PTX capable devices if available.
 %patch30 -p0 -b .rh1668903-1~
 %patch31 -p0 -b .rh1668903-2~
 %patch32 -p0 -b .rh1668903-3~
+%patch33 -p0 -b .fixspec~
 
 cd nvptx-tools-%{nvptx_tools_gitrev}
 %patch1000 -p1 -b .nvptx-tools-no-ptxas~
@@ -934,6 +938,8 @@ rm -f gcc/testsuite/go.test/test/chan/goroutines.go
 %patch1005 -p1
 %patch1006 -p1
 %patch1007 -p1
+%patch1008 -p1
+%patch1009 -p1
 %endif
 %build
 
@@ -1019,7 +1025,7 @@ CONFIGURE_OPTS="\
 %ifarch ppc64le
 	--enable-targets=powerpcle-linux \
 %endif
-%ifarch ppc64le %{mips} riscv64 s390x loongarch64
+%ifarch ppc64le %{mips} riscv64 s390x
 	--disable-multilib \
 %else
 %if 0%{?anolis}
@@ -1030,7 +1036,7 @@ CONFIGURE_OPTS="\
 %endif
 	--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \
 	--enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only \
-%ifnarch %{mips} loongarch64
+%ifnarch %{mips}
 	--with-linker-hash-style=gnu \
 %endif
 	--enable-plugin --enable-initfini-array \
@@ -1052,8 +1058,12 @@ CONFIGURE_OPTS="\
         --with-arch=loongarch64 \
 	--with-abi=lp64 \
         --enable-tls \
+	--with-long-double-128 \
+	--disable-multilib \
+	--enable-initfini-array \
 	--enable-gnu-indirect-function \
 	--disable-emultls \
+	--with-linker-hash-style=gnu \
 %endif
 
 %if 0%{?fedora} >= 21 || 0%{?rhel} >= 7
@@ -1067,7 +1077,7 @@ CONFIGURE_OPTS="\
 %ifarch ppc ppc64 ppc64le ppc64p7
 	--enable-secureplt \
 %endif
-%ifarch sparc sparcv9 sparc64 ppc ppc64 ppc64le ppc64p7 s390 s390x alpha loongarch64
+%ifarch sparc sparcv9 sparc64 ppc ppc64 ppc64le ppc64p7 s390 s390x alpha
 	--with-long-double-128 \
 %endif
 %ifarch sparc
@@ -3240,6 +3250,9 @@ fi
 %endif
 
 %changelog
+* Wed Jul 19 2023 Xing Li <lixing@loongson.cn> 8.5.0-10.1.0.4
+- Sync code to vec.36. (lixing@loongson.cn)
+
 * Fri Dec 2 2022 Xing Li <lixing@loongson.cn> 8.5.0-10.1.0.3
 - rename mt file for LoongArch. (lixing@loongson.cn)
 - Fixup LoongArch atomic_exchange error. (lixing@loongson.cn)
-- 
Gitee