From 143da2bf11c0fbfbebc89682bfcdf3752d850045 Mon Sep 17 00:00:00 2001
From: chenli <chenli@loongson.cn>
Date: Tue, 8 Aug 2023 20:01:42 +0800
Subject: [PATCH] Support LoongArch

---
 0001-Support-LoongArch.patch | 24842 +++++++++++++++++++++++++++++++++
 clang.spec                   |    22 +-
 2 files changed, 24862 insertions(+), 2 deletions(-)
 create mode 100644 0001-Support-LoongArch.patch

diff --git a/0001-Support-LoongArch.patch b/0001-Support-LoongArch.patch
new file mode 100644
index 0000000..7f4d71f
--- /dev/null
+++ b/0001-Support-LoongArch.patch
@@ -0,0 +1,24842 @@
+diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt
+index 5127512fe..8383e6fae 100644
+--- a/clang/bindings/python/tests/CMakeLists.txt
++++ b/clang/bindings/python/tests/CMakeLists.txt
+@@ -40,7 +40,7 @@ endif()
+ # addressed.
+ # SystemZ has broken Python/FFI interface:
+ # https://reviews.llvm.org/D52840#1265716
+-if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$")
++if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$")
+   set(RUN_PYTHON_TESTS FALSE)
+ endif()
+ 
+diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def
+new file mode 100644
+index 000000000..75d7e77c1
+--- /dev/null
++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def
+@@ -0,0 +1,1990 @@
++//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the LoongArch-specific builtin function database. Users of
++// this file must define the BUILTIN macro to make use of this information.
++//
++//===----------------------------------------------------------------------===//
++
++// The format of this database matches clang/Basic/Builtins.def.
++
++// LoongArch LSX
++
++BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc")
++BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc")
++BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc")
++BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc")
++BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc")
++
++BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc")
++BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc")
++BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc")
++BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc")
++
++BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc")
++BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc")
++BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc")
++BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc")
++
++BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc")
++
++BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc")
++BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc")
++
++BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc")
++
++BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc")
++BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc")
++BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc")
++BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrm_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrm_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrp_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrp_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrz_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrz_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrne_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrne_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc")
++BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc")
++
++BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc")
++BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc")
++BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc")
++BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc")
++BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc")
++BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc")
++
++BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc")
++BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc")
++BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc")
++BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc")
++BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc")
++
++BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc")
++
++BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc")
++
++BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc")
++BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc")
++BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc")
++
++BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc")
++BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc")
++BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc")
++BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc")
++BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc")
++BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc")
++
++BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc")
++
++BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc")
++BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc")
++BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc")
++BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc")
++
++BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc")
++BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc")
++BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc")
++BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc")
++
++BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc")
++BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc")
++BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc")
++BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc")
++
++BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc")
++BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc")
++BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc")
++BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc")
++BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc")
++BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc")
++BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc")
++BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc")
++BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc")
++BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc")
++BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc")
++BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc")
++
++BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc")
++BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc")
++BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc")
++BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc")
++
++BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc")
++
++BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc")
++BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc")
++BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc")
++BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc")
++BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc")
++BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc")
++BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc")
++
++//LoongArch LASX
++
++BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc")
++
++
++BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc")
++BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc")
++BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc")
++BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc")
++
++BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc")
++BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc")
++BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc")
++BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc")
++
++BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc")
++BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc")
++BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc")
++BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc")
++
++BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc")
++
++BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc")
++
++BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc")
++
++BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc")
++
++BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc")
++BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc")
++BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc")
++BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc")
++BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc")
++BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc")
++BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc")
++BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc")
++BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc")
++BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc")
++BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc")
++
++BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc")
++BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrne_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrne_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrz_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrz_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrp_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrp_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrm_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrm_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc")
++
++BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc")
++
++BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc")
++BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc")
++BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc")
++BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc")
++
++BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc")
++
++BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc")
++BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc")
++BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc")
++BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc")
++BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc")
++BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc")
++BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc")
++BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc")
++BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc")
++BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc")
++BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc")
++
++BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc")
++
++BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc")
++BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc")
++BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc")
++BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc")
++BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc")
++
++BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc")
++BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc")
++BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc")
++BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc")
++BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc")
++BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc")
++BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc")
++BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc")
++BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc")
++BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc")
++BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc")
++
++
++// LoongArch BASE
++
++BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc")
++BUILTIN(__builtin_loongarch_csrrd_d, "ULiIULi", "nc")
++BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc")
++BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIULi", "nc")
++BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc")
++BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIULi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc")
++BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc")
++BUILTIN(__builtin_loongarch_cacop_d, "viULiLi", "nc")
++BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc")
++BUILTIN(__builtin_loongarch_tlbclr, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbflush, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbfill, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbrd, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbwr, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc")
++BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc")
++BUILTIN(__builtin_loongarch_break, "vIULi", "nc")
++BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc")
++BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc")
++BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc")
++BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc")
++#undef BUILTIN
+diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
+index d8ad9858d..5af3d2099 100644
+--- a/clang/include/clang/Basic/TargetBuiltins.h
++++ b/clang/include/clang/Basic/TargetBuiltins.h
+@@ -150,6 +150,16 @@ namespace clang {
+   };
+   } // namespace RISCV
+ 
++  /// LoongArch builtins
++  namespace LoongArch {
++  enum {
++    LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1,
++#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
++#include "clang/Basic/BuiltinsLoongArch.def"
++    LastTSBuiltin
++  };
++  } // namespace LoongArch
++
+   /// Flags to identify the types for overloaded Neon builtins.
+   ///
+   /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h.
+@@ -341,7 +351,8 @@ namespace clang {
+        PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin,
+        X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin,
+        Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin,
+-       SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin});
++       SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin,
++       LoongArch::LastTSBuiltin});
+ 
+ } // end namespace clang.
+ 
+diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def
+index 9501cca76..8ea4becef 100644
+--- a/clang/include/clang/Basic/TargetCXXABI.def
++++ b/clang/include/clang/Basic/TargetCXXABI.def
+@@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64")
+ ///   - representation of member function pointers adjusted as in ARM.
+ ITANIUM_CXXABI(GenericMIPS, "mips")
+ 
++/// The generic LoongArch ABI is a modified version of the Itanium ABI.
++///
++/// At the moment, only change from the generic ABI in this case is:
++///   - representation of member function pointers adjusted as in ARM.
++ITANIUM_CXXABI(GenericLoongArch, "loongarch")
++
+ /// The WebAssembly ABI is a modified version of the Itanium ABI.
+ ///
+ /// The changes from the Itanium ABI are:
+diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h
+index e727f85ed..507cf580e 100644
+--- a/clang/include/clang/Basic/TargetCXXABI.h
++++ b/clang/include/clang/Basic/TargetCXXABI.h
+@@ -102,6 +102,9 @@ public:
+     case GenericAArch64:
+       return T.isAArch64();
+ 
++    case GenericLoongArch:
++      return T.isLoongArch();
++
+     case GenericMIPS:
+       return T.isMIPS();
+ 
+@@ -166,6 +169,7 @@ public:
+     case Fuchsia:
+     case GenericARM:
+     case GenericAArch64:
++    case GenericLoongArch:
+     case GenericMIPS:
+       // TODO: ARM-style pointers to member functions put the discriminator in
+       //       the this adjustment, so they don't require functions to have any
+@@ -250,6 +254,7 @@ public:
+     case GenericItanium:
+     case iOS:   // old iOS compilers did not follow this rule
+     case Microsoft:
++    case GenericLoongArch:
+     case GenericMIPS:
+     case XL:
+       return true;
+@@ -288,6 +293,7 @@ public:
+     case GenericAArch64:
+     case GenericARM:
+     case iOS:
++    case GenericLoongArch:
+     case GenericMIPS:
+     case XL:
+       return UseTailPaddingUnlessPOD03;
+diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
+index 3cab37b21..8a264ac42 100644
+--- a/clang/include/clang/Driver/Options.td
++++ b/clang/include/clang/Driver/Options.td
+@@ -184,6 +184,8 @@ def m_x86_Features_Group : OptionGroup<"<x86 features group>">,
+                            Group<m_Group>, Flags<[CoreOption]>, DocName<"X86">;
+ def m_riscv_Features_Group : OptionGroup<"<riscv features group>">,
+                              Group<m_Group>, DocName<"RISCV">;
++def m_loongarch_Features_Group : OptionGroup<"<loongarch features group>">,
++                                 Group<m_Group>, DocName<"LoongArch">;
+ 
+ def m_libc_Group : OptionGroup<"<m libc group>">, Group<m_mips_Features_Group>,
+                    Flags<[HelpHidden]>;
+@@ -3491,12 +3493,15 @@ def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group<m_riscv_Features_Gr
+ def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group<m_Group>,
+   HelpText<"Enable use of experimental RISC-V extensions.">;
+ 
+-def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_arm_Features_Group>,
+-  HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">;
+-def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_arm_Features_Group>,
+-  HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">;
++def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_Group>,
++  HelpText<"Allow memory accesses to be unaligned">;
++def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_Group>,
++  HelpText<"Force all memory accesses to be aligned">;
+ def mstrict_align : Flag<["-"], "mstrict-align">, Alias<mno_unaligned_access>, Flags<[CC1Option,HelpHidden]>,
+   HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">;
++def mno_strict_align : Flag<["-"], "mno-strict-align">, Group<m_loongarch_Features_Group>,
++  Flags<[CC1Option,HelpHidden]>, Alias<munaligned_access>,
++  HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">;
+ def mno_thumb : Flag<["-"], "mno-thumb">, Group<m_arm_Features_Group>;
+ def mrestrict_it: Flag<["-"], "mrestrict-it">, Group<m_arm_Features_Group>,
+   HelpText<"Disallow generation of complex IT blocks.">;
+@@ -3824,6 +3829,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg=">
+ def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">,
+   Flags<[CC1Option]>, Group<m_Group>,
+   MarshallingInfoFlag<CodeGenOpts<"CallFEntry">>;
++def mlsx : Flag<["-"], "mlsx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Use LARCH Loongson LSX instructions.">;
++def mno_lsx : Flag<["-"], "mno-lsx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Disable LARCH Loongson LSX instructions.">;
++def mlasx : Flag<["-"], "mlasx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Enable LARCH Loongson LASX instructions.">;
++def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Disable LARCH Loongson LASX instructions.">;
+ def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">,
+   Flags<[CC1Option]>, Group<m_Group>,
+   MarshallingInfoFlag<CodeGenOpts<"MNopMCount">>;
+diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
+index 681a76dfa..5249fe601 100644
+--- a/clang/include/clang/Sema/Sema.h
++++ b/clang/include/clang/Sema/Sema.h
+@@ -13117,6 +13117,9 @@ private:
+   bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum);
+   bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
+                                      CallExpr *TheCall);
++  bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI,
++                                         unsigned BuiltinID,
++                                         CallExpr *TheCall);
+ 
+   bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall);
+   bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call);
+diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap
+index 01bce7771..a21e2beeb 100644
+--- a/clang/include/clang/module.modulemap
++++ b/clang/include/clang/module.modulemap
+@@ -42,6 +42,7 @@ module Clang_Basic {
+   textual header "Basic/BuiltinsHexagon.def"
+   textual header "Basic/BuiltinsHexagonDep.def"
+   textual header "Basic/BuiltinsHexagonMapCustomDep.def"
++  textual header "Basic/BuiltinsLoongArch.def"
+   textual header "Basic/BuiltinsMips.def"
+   textual header "Basic/BuiltinsNEON.def"
+   textual header "Basic/BuiltinsNVPTX.def"
+diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
+index cfd7bf604..c6e1e9eed 100644
+--- a/clang/lib/AST/ASTContext.cpp
++++ b/clang/lib/AST/ASTContext.cpp
+@@ -900,6 +900,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
+   case TargetCXXABI::iOS:
+   case TargetCXXABI::WatchOS:
+   case TargetCXXABI::GenericAArch64:
++  case TargetCXXABI::GenericLoongArch:
+   case TargetCXXABI::GenericMIPS:
+   case TargetCXXABI::GenericItanium:
+   case TargetCXXABI::WebAssembly:
+@@ -11747,6 +11748,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+   case TargetCXXABI::GenericAArch64:
+   case TargetCXXABI::GenericItanium:
+   case TargetCXXABI::GenericARM:
++  case TargetCXXABI::GenericLoongArch:
+   case TargetCXXABI::GenericMIPS:
+   case TargetCXXABI::iOS:
+   case TargetCXXABI::WebAssembly:
+diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
+index c38c9fddb..f0f3839a7 100644
+--- a/clang/lib/Basic/CMakeLists.txt
++++ b/clang/lib/Basic/CMakeLists.txt
+@@ -82,6 +82,7 @@ add_clang_library(clangBasic
+   Targets/Hexagon.cpp
+   Targets/Lanai.cpp
+   Targets/Le64.cpp
++  Targets/LoongArch.cpp
+   Targets/M68k.cpp
+   Targets/MSP430.cpp
+   Targets/Mips.cpp
+diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
+index 2d6ef9984..d63468161 100644
+--- a/clang/lib/Basic/Targets.cpp
++++ b/clang/lib/Basic/Targets.cpp
+@@ -24,6 +24,7 @@
+ #include "Targets/Hexagon.h"
+ #include "Targets/Lanai.h"
+ #include "Targets/Le64.h"
++#include "Targets/LoongArch.h"
+ #include "Targets/M68k.h"
+ #include "Targets/MSP430.h"
+ #include "Targets/Mips.h"
+@@ -327,6 +328,25 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
+   case llvm::Triple::le64:
+     return new Le64TargetInfo(Triple, Opts);
+ 
++#if 0
++  //TODO: support it in future
++  case llvm::Triple::loongarch32:
++    switch (os) {
++    case llvm::Triple::Linux:
++      return new LinuxTargetInfo<LoongArchTargetInfo>(Triple, Opts);
++    default:
++      return new LoongArchTargetInfo(Triple, Opts);
++    }
++#endif
++
++  case llvm::Triple::loongarch64:
++    switch (os) {
++    case llvm::Triple::Linux:
++      return new LinuxTargetInfo<LoongArchTargetInfo>(Triple, Opts);
++    default:
++      return new LoongArchTargetInfo(Triple, Opts);
++    }
++
+   case llvm::Triple::ppc:
+     if (Triple.isOSDarwin())
+       return new DarwinPPC32TargetInfo(Triple, Opts);
+diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp
+new file mode 100644
+index 000000000..7f5632327
+--- /dev/null
++++ b/clang/lib/Basic/Targets/LoongArch.cpp
+@@ -0,0 +1,184 @@
++//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements LoongArch TargetInfo objects.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArch.h"
++#include "Targets.h"
++#include "clang/Basic/Diagnostic.h"
++#include "clang/Basic/MacroBuilder.h"
++#include "clang/Basic/TargetBuiltins.h"
++#include "llvm/ADT/StringSwitch.h"
++
++using namespace clang;
++using namespace clang::targets;
++
++const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = {
++#define BUILTIN(ID, TYPE, ATTRS)                                               \
++  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
++  {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr},
++#include "clang/Basic/BuiltinsLoongArch.def"
++};
++
++bool LoongArchTargetInfo::processorSupportsGPR64() const {
++  return llvm::StringSwitch<bool>(CPU)
++      .Case("la264", true)
++      .Case("la364", true)
++      .Case("la464", true)
++      .Default(false);
++  return false;
++}
++
++static constexpr llvm::StringLiteral ValidCPUNames[] = {
++    {"la264"}, {"la364"}, {"la464"}};
++
++bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const {
++  return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
++}
++
++void LoongArchTargetInfo::fillValidCPUList(
++    SmallVectorImpl<StringRef> &Values) const {
++  Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
++}
++
++void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
++                                      MacroBuilder &Builder) const {
++  Builder.defineMacro("__loongarch__");
++  unsigned GRLen = getRegisterWidth();
++  Builder.defineMacro("__loongarch_grlen", Twine(GRLen));
++  if (GRLen == 64)
++    Builder.defineMacro("__loongarch64");
++
++  if (ABI == "lp32") {
++    Builder.defineMacro("__loongarch32");
++  } else {
++    Builder.defineMacro("__loongarch_lp64");
++  }
++
++  if (ABI == "lp32") {
++    Builder.defineMacro("_ABILP32", "1");
++  } else if (ABI == "lpx32") {
++    Builder.defineMacro("_ABILPX32", "2");
++  } else if (ABI == "lp64") {
++    Builder.defineMacro("_ABILP64", "3");
++    Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64");
++  } else
++    llvm_unreachable("Invalid ABI.");
++
++  Builder.defineMacro("__REGISTER_PREFIX__", "");
++
++  switch (FloatABI) {
++  case HardFloat:
++    Builder.defineMacro("__loongarch_hard_float", Twine(1));
++    Builder.defineMacro(IsSingleFloat ? "__loongarch_single_float"
++                                      : "__loongarch_double_float",
++                        Twine(1));
++    break;
++  case SoftFloat:
++    Builder.defineMacro("__loongarch_soft_float", Twine(1));
++    break;
++  }
++
++  switch (FPMode) {
++  case FP32:
++    Builder.defineMacro("__loongarch_fpr", Twine(32));
++    Builder.defineMacro("__loongarch_frlen", Twine(32));
++    break;
++  case FP64:
++    Builder.defineMacro("__loongarch_fpr", Twine(64));
++    Builder.defineMacro("__loongarch_frlen", Twine(64));
++    break;
++  }
++
++  if (HasLSX)
++    Builder.defineMacro("__loongarch_sx", Twine(1));
++
++  if (HasLASX)
++    Builder.defineMacro("__loongarch_asx", Twine(1));
++
++  Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(0)));
++  Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth()));
++  Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth()));
++
++  Builder.defineMacro("_LOONGARCH_ARCH", "\"" + CPU + "\"");
++  Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(CPU).upper());
++
++  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
++  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
++  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
++
++  // 32-bit loongarch processors don't have the necessary lld/scd instructions
++  // found in 64-bit processors. In the case of lp32 on a 64-bit processor,
++  // the instructions exist but using them violates the ABI since they
++  // require 64-bit GPRs and LP32 only supports 32-bit GPRs.
++  if (ABI == "lpx32" || ABI == "lp64")
++    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
++}
++
++bool LoongArchTargetInfo::hasFeature(StringRef Feature) const {
++  return llvm::StringSwitch<bool>(Feature)
++      .Case("fp64", FPMode == FP64)
++      .Case("lsx", HasLSX)
++      .Case("lasx", HasLASX)
++      .Default(false);
++}
++
++ArrayRef<Builtin::Info> LoongArchTargetInfo::getTargetBuiltins() const {
++  return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin -
++                                             Builtin::FirstTSBuiltin);
++}
++
++bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
++  // FIXME: It's valid to use LP32 on a 64-bit CPU but the backend can't handle
++  //        this yet. It's better to fail here than on the backend assertion.
++  if (processorSupportsGPR64() && ABI == "lp32") {
++    Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU;
++    return false;
++  }
++
++  // 64-bit ABI's require 64-bit CPU's.
++  if (!processorSupportsGPR64() && (ABI == "lpx32" || ABI == "lp64")) {
++    Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU;
++    return false;
++  }
++
++  // FIXME: It's valid to use lp32 on a loongarch64 triple but the backend
++  //        can't handle this yet. It's better to fail here than on the
++  //        backend assertion.
++  if (getTriple().isLoongArch64() && ABI == "lp32") {
++    Diags.Report(diag::err_target_unsupported_abi_for_triple)
++        << ABI << getTriple().str();
++    return false;
++  }
++
++  // FIXME: It's valid to use lpx32/lp64 on a loongarch32 triple but the backend
++  //        can't handle this yet. It's better to fail here than on the
++  //        backend assertion.
++  if (getTriple().isLoongArch32() && (ABI == "lpx32" || ABI == "lp64")) {
++    Diags.Report(diag::err_target_unsupported_abi_for_triple)
++        << ABI << getTriple().str();
++    return false;
++  }
++
++  // -mfp32 and lpx32/lp64 ABIs are incompatible
++  if (FPMode != FP64 && !IsSingleFloat &&
++      (ABI == "lpx32" || ABI == "lp64")) {
++    Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << ABI;
++    return false;
++  }
++
++  if (FPMode != FP64 && (CPU == "la264" || CPU == "la364" || CPU == "la464")) {
++    Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << CPU;
++    return false;
++  }
++
++  return true;
++}
+diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
+new file mode 100644
+index 000000000..ef18cc887
+--- /dev/null
++++ b/clang/lib/Basic/Targets/LoongArch.h
+@@ -0,0 +1,402 @@
++//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares LoongArch TargetInfo objects.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H
++#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H
++
++#include "clang/Basic/TargetInfo.h"
++#include "clang/Basic/TargetOptions.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/Support/Compiler.h"
++
++namespace clang {
++namespace targets {
++
++class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
++  void setDataLayout() {
++    StringRef Layout;
++
++    if (ABI == "lp32")
++      Layout = "m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
++    else if (ABI == "lpx32")
++      Layout = "m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
++    else if (ABI == "lp64")
++      Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128";
++    else
++      llvm_unreachable("Invalid ABI");
++
++    resetDataLayout(("e-" + Layout).str());
++  }
++
++  static const Builtin::Info BuiltinInfo[];
++  std::string CPU;
++  bool IsSingleFloat;
++  enum LoongArchFloatABI { HardFloat, SoftFloat } FloatABI;
++  bool HasLSX;
++  bool HasLASX;
++
++protected:
++  enum FPModeEnum { FP32, FP64 } FPMode;
++  std::string ABI;
++
++public:
++  LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
++      : TargetInfo(Triple), IsSingleFloat(false), FloatABI(HardFloat),
++        HasLSX(false), HasLASX(false), FPMode(FP64) {
++    TheCXXABI.set(TargetCXXABI::GenericLoongArch);
++
++    if (Triple.isLoongArch32())
++      setABI("lp32");
++    else if (Triple.getEnvironment() == llvm::Triple::GNUABILPX32)
++      setABI("lpx32");
++    else
++      setABI("lp64");
++
++    // Currently, CPU only supports 'la464' in LA.
++    if ( ABI == "lp64")
++      CPU = "la464";
++  }
++
++  bool processorSupportsGPR64() const;
++
++  StringRef getABI() const override { return ABI; }
++
++  bool setABI(const std::string &Name) override {
++    if (Name == "lp32") {
++      setLP32ABITypes();
++      ABI = Name;
++      return true;
++    }
++
++    if (Name == "lpx32") {
++      //setLPX32ABITypes();
++      //ABI = Name;
++      //return true;
++      //TODO: implement
++      return false;
++    }
++    if (Name == "lp64") {
++      setLP64ABITypes();
++      ABI = Name;
++      return true;
++    }
++    return false;
++  }
++
++  void setLP32ABITypes() {
++    Int64Type = SignedLongLong;
++    IntMaxType = Int64Type;
++    LongDoubleFormat = &llvm::APFloat::IEEEdouble();
++    LongDoubleWidth = LongDoubleAlign = 64;
++    LongWidth = LongAlign = 32;
++    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
++    PointerWidth = PointerAlign = 32;
++    PtrDiffType = SignedInt;
++    SizeType = UnsignedInt;
++    SuitableAlign = 64;
++  }
++
++  void setLPX32LP64ABITypes() {
++    LongDoubleWidth = LongDoubleAlign = 128;
++    LongDoubleFormat = &llvm::APFloat::IEEEquad();
++    if (getTriple().isOSFreeBSD()) {
++      LongDoubleWidth = LongDoubleAlign = 64;
++      LongDoubleFormat = &llvm::APFloat::IEEEdouble();
++    }
++    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
++    SuitableAlign = 128;
++  }
++
++  void setLP64ABITypes() {
++    setLPX32LP64ABITypes();
++    if (getTriple().isOSOpenBSD()) {
++      Int64Type = SignedLongLong;
++    } else {
++      Int64Type = SignedLong;
++    }
++    IntMaxType = Int64Type;
++    LongWidth = LongAlign = 64;
++    PointerWidth = PointerAlign = 64;
++    PtrDiffType = SignedLong;
++    SizeType = UnsignedLong;
++  }
++
++  void setLPX32ABITypes() {
++    setLPX32LP64ABITypes();
++    Int64Type = SignedLongLong;
++    IntMaxType = Int64Type;
++    LongWidth = LongAlign = 32;
++    PointerWidth = PointerAlign = 32;
++    PtrDiffType = SignedInt;
++    SizeType = UnsignedInt;
++  }
++
++  bool isValidCPUName(StringRef Name) const override;
++  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
++
++  bool setCPU(const std::string &Name) override {
++    CPU = Name;
++    return isValidCPUName(Name);
++  }
++
++  const std::string &getCPU() const { return CPU; }
++  bool
++  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
++                 StringRef CPU,
++                 const std::vector<std::string> &FeaturesVec) const override {
++#if 0
++    if (CPU.empty())
++      CPU = getCPU();
++    Features[CPU] = true;
++#else
++//    if (CPU == "la464")
++//      Features["loongarch64"] = true;
++
++//FIXME: we need this?
++//    if (CPU == "la464")
++//      Features["64bit"] = true;
++#endif
++    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
++  }
++
++  void getTargetDefines(const LangOptions &Opts,
++                        MacroBuilder &Builder) const override;
++
++  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
++
++  bool hasFeature(StringRef Feature) const override;
++
++  bool hasBitIntType() const override { return true; }
++
++  BuiltinVaListKind getBuiltinVaListKind() const override {
++    return TargetInfo::VoidPtrBuiltinVaList;
++  }
++
++  ArrayRef<const char *> getGCCRegNames() const override {
++    static const char *const GCCRegNames[] = {
++        // CPU register names
++        // Must match second column of GCCRegAliases
++        "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9",
++        "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18",
++        "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27",
++        "$r28", "$r29", "$r30", "$r31",
++        // Floating point register names
++        "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9",
++        "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18",
++        "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27",
++        "$f28", "$f29", "$f30", "$f31",
++        // condition register names
++        "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7",
++        // LSX register names
++        "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8",
++        "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16",
++        "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24",
++        "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31",
++        // LASX register names
++        "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8",
++        "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16",
++        "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24",
++        "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31"
++
++    };
++    return llvm::makeArrayRef(GCCRegNames);
++  }
++
++  bool validateAsmConstraint(const char *&Name,
++                             TargetInfo::ConstraintInfo &Info) const override {
++    switch (*Name) {
++    default:
++      return false;
++    case 'r': // CPU registers.
++    case 'f': // floating-point registers.
++      Info.setAllowsRegister();
++      return true;
++    case 'l': // Signed 16-bit constant
++    case 'I': // Signed 12-bit constant
++    case 'K': // Unsigned 12-bit constant
++    case 'J': // Integer 0
++    case 'G': // Floating-point 0
++      return true;
++    case 'm': // Memory address with 12-bit offset
++    case 'R': // An address that can be used in a non-macro load or store
++      Info.setAllowsMemory();
++      return true;
++    case 'Z':
++      if (Name[1] == 'C'        // Memory address with 16-bit and 4 bytes aligned offset
++          || Name[1] == 'B' ) { // Memory address with 0 offset
++        Info.setAllowsMemory();
++        Name++; // Skip over 'Z'.
++        return true;
++      }
++      return false;
++    }
++  }
++
++  std::string convertConstraint(const char *&Constraint) const override {
++    std::string R;
++    switch (*Constraint) {
++    case 'Z': // Two-character constraint; add "^" hint for later parsing.
++      if (Constraint[1] == 'C' || Constraint[1] == 'B') {
++        R = std::string("^") + std::string(Constraint, 2);
++        Constraint++;
++        return R;
++      }
++      break;
++    }
++    return TargetInfo::convertConstraint(Constraint);
++  }
++
++  const char *getClobbers() const override {
++#if 0
++    // In GCC, $1 is not widely used in generated code (it's used only in a few
++    // specific situations), so there is no real need for users to add it to
++    // the clobbers list if they want to use it in their inline assembly code.
++    //
++    // In LLVM, $1 is treated as a normal GPR and is always allocatable during
++    // code generation, so using it in inline assembly without adding it to the
++    // clobbers list can cause conflicts between the inline assembly code and
++    // the surrounding generated code.
++    //
++    // Another problem is that LLVM is allowed to choose $1 for inline assembly
++    // operands, which will conflict with the ".set at" assembler option (which
++    // we use only for inline assembly, in order to maintain compatibility with
++    // GCC) and will also conflict with the user's usage of $1.
++    //
++    // The easiest way to avoid these conflicts and keep $1 as an allocatable
++    // register for generated code is to automatically clobber $1 for all inline
++    // assembly code.
++    //
++    // FIXME: We should automatically clobber $1 only for inline assembly code
++    // which actually uses it. This would allow LLVM to use $1 for inline
++    // assembly operands if the user's assembly code doesn't use it.
++    return "~{$1}";
++#endif
++    return "";
++  }
++
++  bool handleTargetFeatures(std::vector<std::string> &Features,
++                            DiagnosticsEngine &Diags) override {
++    IsSingleFloat = false;
++    FloatABI = HardFloat;
++    FPMode = FP64;
++
++    for (const auto &Feature : Features) {
++      if (Feature == "+single-float")
++        IsSingleFloat = true;
++      else if (Feature == "+soft-float")
++        FloatABI = SoftFloat;
++      else if (Feature == "+lsx")
++        HasLSX = true;
++      else if (Feature == "+lasx") {
++        HasLASX = true;
++        HasLSX = true;
++      } else if (Feature == "+fp64")
++        FPMode = FP64;
++      else if (Feature == "-fp64")
++        FPMode = FP32;
++    }
++
++    setDataLayout();
++
++    return true;
++  }
++
++  int getEHDataRegisterNumber(unsigned RegNo) const override {
++    if (RegNo == 0)
++      return 4;
++    if (RegNo == 1)
++      return 5;
++    return -1;
++  }
++
++  bool isCLZForZeroUndef() const override { return false; }
++
++  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
++    static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
++        {{"zero", "$zero", "r0", "$0"}, "$r0"},
++        {{"ra", "$ra", "r1", "$1"}, "$r1"},
++        {{"tp", "$tp", "r2", "$2"}, "$r2"},
++        {{"sp", "$sp", "r3", "$3"}, "$r3"},
++        {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"},
++        {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"},
++        {{"a2", "$a2", "r6", "$6"}, "$r6"},
++        {{"a3", "$a3", "r7", "$7"}, "$r7"},
++        {{"a4", "$a4", "r8", "$8"}, "$r8"},
++        {{"a5", "$a5", "r9", "$9"}, "$r9"},
++        {{"a6", "$a6", "r10", "$10"}, "$r10"},
++        {{"a7", "$a7", "r11", "$11"}, "$r11"},
++        {{"t0", "$t0", "r12", "$12"}, "$r12"},
++        {{"t1", "$t1", "r13", "$13"}, "$r13"},
++        {{"t2", "$t2", "r14", "$14"}, "$r14"},
++        {{"t3", "$t3", "r15", "$15"}, "$r15"},
++        {{"t4", "$t4", "r16", "$16"}, "$r16"},
++        {{"t5", "$t5", "r17", "$17"}, "$r17"},
++        {{"t6", "$t6", "r18", "$18"}, "$r18"},
++        {{"t7", "$t7", "r19", "$19"}, "$r19"},
++        {{"t8", "$t8", "r20", "$20"}, "$r20"},
++        //{{"x", "$x", "r21", "$21"}, "$r21"},
++        {{"fp", "$fp", "r22", "$22"}, "$r22"},
++        {{"s0", "$s0", "r23", "$23"}, "$r23"},
++        {{"s1", "$s1", "r24", "$24"}, "$r24"},
++        {{"s2", "$s2", "r25", "$25"}, "$r25"},
++        {{"s3", "$s3", "r26", "$26"}, "$r26"},
++        {{"s4", "$s4", "r27", "$27"}, "$r27"},
++        {{"s5", "$s5", "r28", "$28"}, "$r28"},
++        {{"s6", "$s6", "r29", "$29"}, "$r29"},
++        {{"s7", "$s7", "r30", "$30"}, "$r30"},
++        {{"s8", "$s8", "r31", "$31"}, "$r31"},
++        {{"fa0", "$fa0", "f0"}, "$f0"},
++        {{"fa1", "$fa1", "f1"}, "$f1"},
++        {{"fa2", "$fa2", "f2"}, "$f2"},
++        {{"fa3", "$fa3", "f3"}, "$f3"},
++        {{"fa4", "$fa4", "f4"}, "$f4"},
++        {{"fa5", "$fa5", "f5"}, "$f5"},
++        {{"fa6", "$fa6", "f6"}, "$f6"},
++        {{"fa7", "$fa7", "f7"}, "$f7"},
++        {{"ft0", "$ft0", "f8"}, "$f8"},
++        {{"ft1", "$ft1", "f9"}, "$f9"},
++        {{"ft2", "$ft2", "f10"}, "$f10"},
++        {{"ft3", "$ft3", "f11"}, "$f11"},
++        {{"ft4", "$ft4", "f12"}, "$f12"},
++        {{"ft5", "$ft5", "f13"}, "$f13"},
++        {{"ft6", "$ft6", "f14"}, "$f14"},
++        {{"ft7", "$ft7", "f15"}, "$f15"},
++        {{"ft8", "$ft8", "f16"}, "$f16"},
++        {{"ft9", "$ft9", "f17"}, "$f17"},
++        {{"ft10", "$ft10", "f18"}, "$f18"},
++        {{"ft11", "$ft11", "f19"}, "$f19"},
++        {{"ft12", "$ft12", "f20"}, "$f20"},
++        {{"ft13", "$ft13", "f21"}, "$f21"},
++        {{"ft14", "$ft14", "f22"}, "$f22"},
++        {{"ft15", "$ft15", "f23"}, "$f23"},
++        {{"fs0", "$fs0", "f24"}, "$f24"},
++        {{"fs1", "$fs1", "f25"}, "$f25"},
++        {{"fs2", "$fs2", "f26"}, "$f26"},
++        {{"fs3", "$fs3", "f27"}, "$f27"},
++        {{"fs4", "$fs4", "f28"}, "$f28"},
++        {{"fs5", "$fs5", "f29"}, "$f29"},
++        {{"fs6", "$fs6", "f30"}, "$f30"},
++        {{"fs7", "$fs7", "f31"}, "$f31"},
++    };
++    return llvm::makeArrayRef(GCCRegAliases);
++  }
++
++  bool hasInt128Type() const override {
++    return (ABI == "lpx32" || ABI == "lp64") || getTargetOpts().ForceEnableInt128;
++  }
++
++  bool validateTarget(DiagnosticsEngine &Diags) const override;
++};
++} // namespace targets
++} // namespace clang
++
++#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H
+diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
+index d87692fac..197915e15 100644
+--- a/clang/lib/CodeGen/CodeGenModule.cpp
++++ b/clang/lib/CodeGen/CodeGenModule.cpp
+@@ -84,6 +84,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
+   case TargetCXXABI::GenericARM:
+   case TargetCXXABI::iOS:
+   case TargetCXXABI::WatchOS:
++  case TargetCXXABI::GenericLoongArch:
+   case TargetCXXABI::GenericMIPS:
+   case TargetCXXABI::GenericItanium:
+   case TargetCXXABI::WebAssembly:
+diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
+index fc2ff15a6..eebd6fb65 100644
+--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
++++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
+@@ -533,6 +533,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
+     return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
+                              /*UseARMGuardVarABI=*/true);
+ 
++  case TargetCXXABI::GenericLoongArch:
++    return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
++
+   case TargetCXXABI::GenericMIPS:
+     return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
+ 
+diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
+index 44743fa02..fbc6aed85 100644
+--- a/clang/lib/CodeGen/TargetInfo.cpp
++++ b/clang/lib/CodeGen/TargetInfo.cpp
+@@ -11323,6 +11323,558 @@ public:
+ };
+ } // namespace
+ 
++//===----------------------------------------------------------------------===//
++// LoongArch ABI Implementation
++//===----------------------------------------------------------------------===//
++
++namespace {
++class LoongArchABIInfo : public DefaultABIInfo {
++private:
++  // Size of the integer ('r') registers in bits.
++  unsigned GRLen;
++  // Size of the floating point ('f') registers in bits. Note that the target
++  // ISA might have a wider FRLen than the selected ABI.
++  unsigned FRLen;
++  static const int NumArgGPRs = 8;
++  static const int NumArgFPRs = 8;
++  bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
++                                      llvm::Type *&Field1Ty,
++                                      CharUnits &Field1Off,
++                                      llvm::Type *&Field2Ty,
++                                      CharUnits &Field2Off) const;
++
++public:
++  LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen)
++      : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {}
++
++  // DefaultABIInfo's classifyReturnType and classifyArgumentType are
++  // non-virtual, but computeInfo is virtual, so we overload it.
++  void computeInfo(CGFunctionInfo &FI) const override;
++
++  ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
++                                  int &ArgFPRsLeft) const;
++  ABIArgInfo classifyReturnType(QualType RetTy) const;
++
++  uint64_t MinABIStackAlignInBytes = 8;
++  uint64_t StackAlignInBytes = 16;
++  llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
++  llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const;
++  void CoerceToIntArgs(uint64_t TySize,
++                       SmallVectorImpl<llvm::Type *> &ArgList) const;
++
++  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
++                    QualType Ty) const override;
++
++  ABIArgInfo extendType(QualType Ty) const;
++
++  bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
++                                CharUnits &Field1Off, llvm::Type *&Field2Ty,
++                                CharUnits &Field2Off, int &NeededArgGPRs,
++                                int &NeededArgFPRs) const;
++  ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
++                                               CharUnits Field1Off,
++                                               llvm::Type *Field2Ty,
++                                               CharUnits Field2Off) const;
++};
++} // end anonymous namespace
++
++void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const {
++  QualType RetTy = FI.getReturnType();
++  if (!getCXXABI().classifyReturnType(FI))
++    FI.getReturnInfo() = classifyReturnType(RetTy);
++
++  // IsRetIndirect is true if classifyArgumentType indicated the value should
++  // be passed indirect or if the type size is greater than 2*grlen.
++  bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect ||
++                       getContext().getTypeSize(RetTy) > (2 * GRLen);
++
++  // We must track the number of GPRs used in order to conform to the LoongArch
++  // ABI, as integer scalars passed in registers should have signext/zeroext
++  // when promoted, but are anyext if passed on the stack. As GPR usage is
++  // different for variadic arguments, we must also track whether we are
++  // examining a vararg or not.
++  int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
++  int ArgFPRsLeft = FRLen ? NumArgFPRs : 0;
++  int NumFixedArgs = FI.getNumRequiredArgs();
++
++  int ArgNum = 0;
++  for (auto &ArgInfo : FI.arguments()) {
++    bool IsFixed = ArgNum < NumFixedArgs;
++    ArgInfo.info =
++        classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
++    ArgNum++;
++  }
++}
++
++// Returns true if the struct is a potential candidate for the floating point
++// calling convention. If this function returns true, the caller is
++// responsible for checking that if there is only a single field then that
++// field is a float.
++bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
++                                                  llvm::Type *&Field1Ty,
++                                                  CharUnits &Field1Off,
++                                                  llvm::Type *&Field2Ty,
++                                                  CharUnits &Field2Off) const {
++  bool IsInt = Ty->isIntegralOrEnumerationType();
++  bool IsFloat = Ty->isRealFloatingType();
++
++  if (IsInt || IsFloat) {
++    uint64_t Size = getContext().getTypeSize(Ty);
++    if (IsInt && Size > GRLen)
++      return false;
++    // Can't be eligible if larger than the FP registers. Half precision isn't
++    // currently supported on LoongArch and the ABI hasn't been confirmed, so
++    // default to the integer ABI in that case.
++    if (IsFloat && (Size > FRLen || Size < 32))
++      return false;
++    // Can't be eligible if an integer type was already found (int+int pairs
++    // are not eligible).
++    if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
++      return false;
++    if (!Field1Ty) {
++      Field1Ty = CGT.ConvertType(Ty);
++      Field1Off = CurOff;
++      return true;
++    }
++    if (!Field2Ty) {
++      Field2Ty = CGT.ConvertType(Ty);
++      Field2Off = CurOff;
++      return true;
++    }
++    return false;
++  }
++
++  if (auto CTy = Ty->getAs<ComplexType>()) {
++    if (Field1Ty)
++      return false;
++    QualType EltTy = CTy->getElementType();
++    if (getContext().getTypeSize(EltTy) > FRLen)
++      return false;
++    Field1Ty = CGT.ConvertType(EltTy);
++    Field1Off = CurOff;
++    assert(CurOff.isZero() && "Unexpected offset for first field");
++    Field2Ty = Field1Ty;
++    Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
++    return true;
++  }
++
++  if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
++    uint64_t ArraySize = ATy->getSize().getZExtValue();
++    QualType EltTy = ATy->getElementType();
++    CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
++    for (uint64_t i = 0; i < ArraySize; ++i) {
++      bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
++                                                Field1Off, Field2Ty, Field2Off);
++      if (!Ret)
++        return false;
++      CurOff += EltSize;
++    }
++    return true;
++  }
++
++  if (const auto *RTy = Ty->getAs<RecordType>()) {
++    // Structures with either a non-trivial destructor or a non-trivial
++    // copy constructor are not eligible for the FP calling convention.
++    if (getRecordArgABI(Ty, CGT.getCXXABI()))
++      return false;
++    if (isEmptyRecord(getContext(), Ty, true))
++      return true;
++    const RecordDecl *RD = RTy->getDecl();
++    // Unions aren't eligible unless they're empty (which is caught above).
++    if (RD->isUnion())
++      return false;
++    const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
++    // If this is a C++ record, check the bases first.
++    if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
++      for (const CXXBaseSpecifier &B : CXXRD->bases()) {
++        const auto *BDecl =
++            cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
++        CharUnits BaseOff = Layout.getBaseClassOffset(BDecl);
++        bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff,
++                                                  Field1Ty, Field1Off, Field2Ty,
++                                                  Field2Off);
++        if (!Ret)
++          return false;
++      }
++    }
++    int ZeroWidthBitFieldCount = 0;
++    for (const FieldDecl *FD : RD->fields()) {
++      uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
++      QualType QTy = FD->getType();
++      if (FD->isBitField()) {
++        unsigned BitWidth = FD->getBitWidthValue(getContext());
++        // Allow a bitfield with a type greater than GRLen as long as the
++        // bitwidth is GRLen or less.
++        if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen)
++          QTy = getContext().getIntTypeForBitwidth(GRLen, false);
++        if (BitWidth == 0) {
++          ZeroWidthBitFieldCount++;
++          continue;
++        }
++      }
++
++      bool Ret = detectFPCCEligibleStructHelper(
++          QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
++          Field1Ty, Field1Off, Field2Ty, Field2Off);
++      if (!Ret)
++        return false;
++
++      // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
++      // or int+fp structs, but are ignored for a struct with an fp field and
++      // any number of zero-width bitfields.
++      if (Field2Ty && ZeroWidthBitFieldCount > 0)
++        return false;
++    }
++    return Field1Ty != nullptr;
++  }
++
++  return false;
++}
++
++// Determine if a struct is eligible for passing according to the floating
++// point calling convention (i.e., when flattened it contains a single fp
++// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
++// NeededArgGPRs are incremented appropriately.
++bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
++                                            CharUnits &Field1Off,
++                                            llvm::Type *&Field2Ty,
++                                            CharUnits &Field2Off,
++                                            int &NeededArgGPRs,
++                                            int &NeededArgFPRs) const {
++  Field1Ty = nullptr;
++  Field2Ty = nullptr;
++  NeededArgGPRs = 0;
++  NeededArgFPRs = 0;
++  bool IsCandidate = detectFPCCEligibleStructHelper(
++      Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
++  // Not really a candidate if we have a single int but no float.
++  if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
++    return IsCandidate = false;
++  if (!IsCandidate)
++    return false;
++  if (Field1Ty && Field1Ty->isFloatingPointTy())
++    NeededArgFPRs++;
++  else if (Field1Ty)
++    NeededArgGPRs++;
++  if (Field2Ty && Field2Ty->isFloatingPointTy())
++    NeededArgFPRs++;
++  else if (Field2Ty)
++    NeededArgGPRs++;
++  return IsCandidate;
++}
++
++// Call getCoerceAndExpand for the two-element flattened struct described by
++// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
++// appropriate coerceToType and unpaddedCoerceToType.
++ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct(
++    llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
++    CharUnits Field2Off) const {
++  SmallVector<llvm::Type *, 3> CoerceElts;
++  SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
++  if (!Field1Off.isZero())
++    CoerceElts.push_back(llvm::ArrayType::get(
++        llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
++
++  CoerceElts.push_back(Field1Ty);
++  UnpaddedCoerceElts.push_back(Field1Ty);
++
++  if (!Field2Ty) {
++    return ABIArgInfo::getCoerceAndExpand(
++        llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
++        UnpaddedCoerceElts[0]);
++  }
++
++  CharUnits Field2Align =
++      CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
++  CharUnits Field1Size =
++      CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
++  CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align);
++
++  CharUnits Padding = CharUnits::Zero();
++  if (Field2Off > Field2OffNoPadNoPack)
++    Padding = Field2Off - Field2OffNoPadNoPack;
++  else if (Field2Off != Field2Align && Field2Off > Field1Size)
++    Padding = Field2Off - Field1Size;
++
++  bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
++
++  if (!Padding.isZero())
++    CoerceElts.push_back(llvm::ArrayType::get(
++        llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
++
++  CoerceElts.push_back(Field2Ty);
++  UnpaddedCoerceElts.push_back(Field2Ty);
++
++  auto CoerceToType =
++      llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
++  auto UnpaddedCoerceToType =
++      llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
++
++  return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
++}
++
++void LoongArchABIInfo::CoerceToIntArgs(
++    uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
++  llvm::IntegerType *IntTy =
++    llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
++
++  // Add (TySize / MinABIStackAlignInBytes) args of IntTy.
++  for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
++    ArgList.push_back(IntTy);
++
++  // If necessary, add one more integer type to ArgList.
++  unsigned R = TySize % (MinABIStackAlignInBytes * 8);
++
++  if (R)
++    ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
++}
++
++llvm::Type*  LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
++  SmallVector<llvm::Type*, 8> ArgList, IntArgList;
++
++  if (Ty->isComplexType())
++    return CGT.ConvertType(Ty);
++
++  const RecordType *RT = Ty->getAs<RecordType>();
++
++  // Unions/vectors are passed in integer registers.
++  if (!RT || !RT->isStructureOrClassType()) {
++    CoerceToIntArgs(TySize, ArgList);
++    return llvm::StructType::get(getVMContext(), ArgList);
++  }
++
++  const RecordDecl *RD = RT->getDecl();
++  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
++  assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
++
++  uint64_t LastOffset = 0;
++  unsigned idx = 0;
++  llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
++
++  // Iterate over fields in the struct/class and check if there are any aligned
++  // double fields.
++  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
++       i != e; ++i, ++idx) {
++    const QualType Ty = i->getType();
++    const BuiltinType *BT = Ty->getAs<BuiltinType>();
++
++    if (!BT || BT->getKind() != BuiltinType::Double)
++      continue;
++
++    uint64_t Offset = Layout.getFieldOffset(idx);
++    if (Offset % 64) // Ignore doubles that are not aligned.
++      continue;
++
++    // Add ((Offset - LastOffset) / 64) args of type i64.
++    for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
++      ArgList.push_back(I64);
++
++    // Add double type.
++    ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
++    LastOffset = Offset + 64;
++  }
++
++  CoerceToIntArgs(TySize - LastOffset, IntArgList);
++  ArgList.append(IntArgList.begin(), IntArgList.end());
++
++  return llvm::StructType::get(getVMContext(), ArgList);
++}
++
++llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset,
++                                        uint64_t Offset) const {
++  if (OrigOffset + MinABIStackAlignInBytes > Offset)
++    return nullptr;
++
++  return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
++}
++
++ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
++                                              int &ArgGPRsLeft,
++                                              int &ArgFPRsLeft) const {
++  assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
++  Ty = useFirstFieldIfTransparentUnion(Ty);
++
++  // Structures with either a non-trivial destructor or a non-trivial
++  // copy constructor are always passed indirectly.
++  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
++    if (ArgGPRsLeft)
++      ArgGPRsLeft -= 1;
++    return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
++                                           CGCXXABI::RAA_DirectInMemory);
++  }
++
++  // Ignore empty structs/unions.
++  if (isEmptyRecord(getContext(), Ty, true))
++    return ABIArgInfo::getIgnore();
++
++  uint64_t Size = getContext().getTypeSize(Ty);
++
++  // Pass floating point values via FPRs if possible.
++  if (IsFixed && Ty->isFloatingType() && FRLen >= Size && ArgFPRsLeft) {
++    ArgFPRsLeft--;
++    return ABIArgInfo::getDirect();
++  }
++
++  // Complex types for the hard float ABI must be passed direct rather than
++  // using CoerceAndExpand.
++  if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) {
++    QualType EltTy = Ty->getAs<ComplexType>()->getElementType();
++    if (getContext().getTypeSize(EltTy) <= FRLen) {
++      ArgFPRsLeft -= 2;
++      return ABIArgInfo::getDirect();
++    }
++  }
++
++  if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) &&
++                              (getTarget().hasFeature("lsx"))) ||
++                             ((getContext().getTypeSize(Ty) == 256) &&
++                              getTarget().hasFeature("lasx"))))
++    return ABIArgInfo::getDirect();
++
++  if (IsFixed && FRLen && Ty->isStructureOrClassType()) {
++    llvm::Type *Field1Ty = nullptr;
++    llvm::Type *Field2Ty = nullptr;
++    CharUnits Field1Off = CharUnits::Zero();
++    CharUnits Field2Off = CharUnits::Zero();
++    int NeededArgGPRs;
++    int NeededArgFPRs;
++    bool IsCandidate =
++        detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
++                                 NeededArgGPRs, NeededArgFPRs);
++    if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
++        NeededArgFPRs <= ArgFPRsLeft) {
++      ArgGPRsLeft -= NeededArgGPRs;
++      ArgFPRsLeft -= NeededArgFPRs;
++      return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
++                                               Field2Off);
++    }
++  } else if (Ty->isStructureOrClassType() && Size == 128 &&
++             isAggregateTypeForABI(Ty)) {
++    uint64_t Offset = 8;
++    uint64_t OrigOffset = Offset;
++    uint64_t TySize = getContext().getTypeSize(Ty);
++    uint64_t Align = getContext().getTypeAlign(Ty) / 8;
++
++    Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
++                     (uint64_t)StackAlignInBytes);
++    unsigned CurrOffset = llvm::alignTo(Offset, Align);
++    Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
++
++    ABIArgInfo ArgInfo =
++        ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
++                              getPaddingType(OrigOffset, CurrOffset));
++    ArgInfo.setInReg(true);
++    return ArgInfo;
++  }
++
++  uint64_t NeededAlign = getContext().getTypeAlign(Ty);
++  // Determine the number of GPRs needed to pass the current argument
++  // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned"
++  // register pairs, so may consume 3 registers.
++  int NeededArgGPRs = 1;
++  if (!IsFixed && NeededAlign == 2 * GRLen)
++    NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
++  else if (Size > GRLen && Size <= 2 * GRLen)
++    NeededArgGPRs = 2;
++
++  if (NeededArgGPRs > ArgGPRsLeft) {
++    NeededArgGPRs = ArgGPRsLeft;
++  }
++
++  ArgGPRsLeft -= NeededArgGPRs;
++
++  if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
++    // Treat an enum type as its underlying type.
++    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
++      Ty = EnumTy->getDecl()->getIntegerType();
++
++    // All integral types are promoted to GRLen width, unless passed on the
++    // stack.
++    if (Size < GRLen && Ty->isIntegralOrEnumerationType()) {
++      return extendType(Ty);
++    }
++
++    return ABIArgInfo::getDirect();
++  }
++
++  // Aggregates which are <= 2*GRLen will be passed in registers if possible,
++  // so coerce to integers.
++  if (Size <= 2 * GRLen) {
++    unsigned Alignment = getContext().getTypeAlign(Ty);
++
++    // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is
++    // required, and a 2-element GRLen array if only GRLen alignment is required.
++    if (Size <= GRLen) {
++      return ABIArgInfo::getDirect(
++          llvm::IntegerType::get(getVMContext(), GRLen));
++    } else if (Alignment == 2 * GRLen) {
++      return ABIArgInfo::getDirect(
++          llvm::IntegerType::get(getVMContext(), 2 * GRLen));
++    } else {
++      return ABIArgInfo::getDirect(llvm::ArrayType::get(
++          llvm::IntegerType::get(getVMContext(), GRLen), 2));
++    }
++  }
++  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
++}
++
++ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const {
++  if (RetTy->isVoidType())
++    return ABIArgInfo::getIgnore();
++
++  int ArgGPRsLeft = 2;
++  int ArgFPRsLeft = FRLen ? 2 : 0;
++
++  // The rules for return and argument types are the same, so defer to
++  // classifyArgumentType.
++  return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
++                              ArgFPRsLeft);
++}
++
++Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
++                                    QualType Ty) const {
++  CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8);
++
++  // Empty records are ignored for parameter passing purposes.
++  if (isEmptyRecord(getContext(), Ty, true)) {
++    Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
++                           getVAListElementType(CGF), SlotSize);
++    Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
++    return Addr;
++  }
++
++  auto TInfo = getContext().getTypeInfoInChars(Ty);
++
++  // Arguments bigger than 2*GRlen bytes are passed indirectly.
++  bool IsIndirect = TInfo.Width > 2 * SlotSize;
++
++  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo,
++                          SlotSize, /*AllowHigherAlign=*/true);
++}
++
++ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const {
++  int TySize = getContext().getTypeSize(Ty);
++  // LP64 ABI requires unsigned 32 bit integers to be sign extended.
++  if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
++    return ABIArgInfo::getSignExtend(Ty);
++  return ABIArgInfo::getExtend(Ty);
++}
++
++namespace {
++class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo {
++public:
++  LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen,
++                         unsigned FRLen)
++      : TargetCodeGenInfo(std::make_unique<LoongArchABIInfo>(
++                          CGT, GRLen, FRLen)) {}
++
++  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
++                           CodeGen::CodeGenModule &CGM) const override {
++    return;
++  }
++};
++} // namespace
++
+ //===----------------------------------------------------------------------===//
+ // VE ABI Implementation.
+ //
+@@ -11560,6 +12112,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
+ 
+   case llvm::Triple::le32:
+     return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
++
+   case llvm::Triple::m68k:
+     return SetCGInfo(new M68kTargetCodeGenInfo(Types));
+   case llvm::Triple::mips:
+@@ -11677,6 +12230,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
+   case llvm::Triple::msp430:
+     return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
+ 
++  case llvm::Triple::loongarch64:
++    return SetCGInfo(new LoongArchTargetCodeGenInfo(Types, 64, 64));
++
+   case llvm::Triple::riscv32:
+   case llvm::Triple::riscv64: {
+     StringRef ABIStr = getTarget().getABI();
+diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt
+index 18c9b2d04..14c7053e0 100644
+--- a/clang/lib/Driver/CMakeLists.txt
++++ b/clang/lib/Driver/CMakeLists.txt
+@@ -28,6 +28,7 @@ add_clang_library(clangDriver
+   ToolChains/Arch/AArch64.cpp
+   ToolChains/Arch/ARM.cpp
+   ToolChains/Arch/CSKY.cpp
++  ToolChains/Arch/LoongArch.cpp
+   ToolChains/Arch/M68k.cpp
+   ToolChains/Arch/Mips.cpp
+   ToolChains/Arch/PPC.cpp
+diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
+index 3f29afd35..50970bd33 100644
+--- a/clang/lib/Driver/Driver.cpp
++++ b/clang/lib/Driver/Driver.cpp
+@@ -626,6 +626,29 @@ static llvm::Triple computeTargetTriple(const Driver &D,
+     Target.setVendorName("intel");
+   }
+ 
++  // If target is LoongArch adjust the target triple
++  // accordingly to provided ABI name.
++  A = Args.getLastArg(options::OPT_mabi_EQ);
++  if (A && Target.isLoongArch()) {
++    StringRef ABIName = A->getValue();
++    if (ABIName == "lp32") {
++      Target = Target.get32BitArchVariant();
++      if (Target.getEnvironment() == llvm::Triple::GNUABI64 ||
++          Target.getEnvironment() == llvm::Triple::GNUABILPX32)
++        Target.setEnvironment(llvm::Triple::GNU);
++    } else if (ABIName == "lpx32") {
++      Target = Target.get64BitArchVariant();
++      if (Target.getEnvironment() == llvm::Triple::GNU ||
++          Target.getEnvironment() == llvm::Triple::GNUABI64)
++        Target.setEnvironment(llvm::Triple::GNUABILPX32);
++    } else if (ABIName == "lp64") {
++      Target = Target.get64BitArchVariant();
++      if (Target.getEnvironment() == llvm::Triple::GNU ||
++          Target.getEnvironment() == llvm::Triple::GNUABILPX32)
++        Target.setEnvironment(llvm::Triple::GNUABI64);
++    }
++  }
++
+   // If target is MIPS adjust the target triple
+   // accordingly to provided ABI name.
+   A = Args.getLastArg(options::OPT_mabi_EQ);
+diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+new file mode 100644
+index 000000000..2c42db690
+--- /dev/null
++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+@@ -0,0 +1,211 @@
++//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArch.h"
++#include "ToolChains/CommonArgs.h"
++#include "clang/Driver/Driver.h"
++#include "clang/Driver/DriverDiagnostic.h"
++#include "clang/Driver/Options.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/Option/ArgList.h"
++
++using namespace clang::driver;
++using namespace clang::driver::tools;
++using namespace clang;
++using namespace llvm::opt;
++
++// Get CPU and ABI names. They are not independent
++// so we have to calculate them together.
++void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
++                            StringRef &CPUName, StringRef &ABIName) {
++  const char *DefLoongArch32CPU = "loongarch32";
++  const char *DefLoongArch64CPU = "la464";
++
++  if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ,
++                               options::OPT_mcpu_EQ))
++    CPUName = A->getValue();
++
++  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
++    ABIName = A->getValue();
++    // Convert a GNU style LoongArch ABI name to the name
++    // accepted by LLVM LoongArch backend.
++    ABIName = llvm::StringSwitch<llvm::StringRef>(ABIName)
++                  .Case("32", "lp32")
++                  .Case("64", "lp64")
++                  .Default(ABIName);
++  }
++
++  // Setup default CPU and ABI names.
++  if (CPUName.empty() && ABIName.empty()) {
++    switch (Triple.getArch()) {
++    default:
++      llvm_unreachable("Unexpected triple arch name");
++    case llvm::Triple::loongarch32:
++      CPUName = DefLoongArch32CPU;
++      break;
++    case llvm::Triple::loongarch64:
++      CPUName = DefLoongArch64CPU;
++      break;
++    }
++  }
++
++  if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32))
++    ABIName = "lpx32";
++
++  if (ABIName.empty()) {
++    ABIName = llvm::StringSwitch<const char *>(CPUName)
++                  .Case("loongarch32", "lp32")
++                  .Cases("la264", "la364", "la464", "lp64")
++                  .Default("");
++  }
++
++  if (ABIName.empty()) {
++    // Deduce ABI name from the target triple.
++    ABIName = Triple.isLoongArch32() ? "lp32" : "lp64";
++  }
++
++  if (CPUName.empty()) {
++    // Deduce CPU name from ABI name.
++    CPUName = llvm::StringSwitch<const char *>(ABIName)
++                  .Case("lp32", DefLoongArch32CPU)
++                  .Cases("lpx32", "lp64", DefLoongArch64CPU)
++                  .Default("");
++  }
++
++  // FIXME: Warn on inconsistent use of -march and -mabi.
++}
++
++std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args,
++                                      const llvm::Triple &Triple) {
++  StringRef CPUName, ABIName;
++  tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++  return llvm::StringSwitch<std::string>(ABIName)
++      .Case("lp32", "")
++      .Case("lpx32", "32")
++      .Case("lp64", "64");
++}
++
++// Convert ABI name to the GNU tools acceptable variant.
++StringRef loongarch::getGnuCompatibleLoongArchABIName(StringRef ABI) {
++  return llvm::StringSwitch<llvm::StringRef>(ABI)
++      .Case("lp32", "32")
++      .Case("lp64", "64")
++      .Default(ABI);
++}
++
++// Select the LoongArch float ABI as determined by -msoft-float, -mhard-float,
++// and -mfloat-abi=.
++loongarch::FloatABI loongarch::getLoongArchFloatABI(const Driver &D, const ArgList &Args) {
++  loongarch::FloatABI ABI = loongarch::FloatABI::Invalid;
++  if (Arg *A =
++          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
++                          options::OPT_mfloat_abi_EQ)) {
++    if (A->getOption().matches(options::OPT_msoft_float))
++      ABI = loongarch::FloatABI::Soft;
++    else if (A->getOption().matches(options::OPT_mhard_float))
++      ABI = loongarch::FloatABI::Hard;
++    else {
++      ABI = llvm::StringSwitch<loongarch::FloatABI>(A->getValue())
++                .Case("soft", loongarch::FloatABI::Soft)
++                .Case("hard", loongarch::FloatABI::Hard)
++                .Default(loongarch::FloatABI::Invalid);
++      if (ABI == loongarch::FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
++        D.Diag(clang::diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
++        ABI = loongarch::FloatABI::Hard;
++      }
++    }
++  }
++
++  // If unspecified, choose the default based on the platform.
++  if (ABI == loongarch::FloatABI::Invalid) {
++    // Assume "hard", because it's a default value used by gcc.
++    // When we start to recognize specific target LoongArch processors,
++    // we will be able to select the default more correctly.
++    ABI = loongarch::FloatABI::Hard;
++  }
++
++  assert(ABI != loongarch::FloatABI::Invalid && "must select an ABI");
++  return ABI;
++}
++
++void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple,
++                                 const ArgList &Args,
++                                 std::vector<StringRef> &Features) {
++  StringRef CPUName;
++  StringRef ABIName;
++  getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++  ABIName = getGnuCompatibleLoongArchABIName(ABIName);
++
++  // At final link time, LP32 and LPX32 with CPIC will have another section
++  // added to the binary which contains the stub functions to perform
++  // any fixups required for PIC code.
++
++  bool IsLP64 = ABIName == "64";
++  bool NonPIC = false;
++
++  Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
++                                    options::OPT_fpic, options::OPT_fno_pic,
++                                    options::OPT_fPIE, options::OPT_fno_PIE,
++                                    options::OPT_fpie, options::OPT_fno_pie);
++  if (LastPICArg) {
++    Option O = LastPICArg->getOption();
++    NonPIC =
++        (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) ||
++         O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie));
++  }
++
++  if (IsLP64 && NonPIC) {
++    NonPIC = false;
++  }
++
++  loongarch::FloatABI FloatABI = loongarch::getLoongArchFloatABI(D, Args);
++  if (FloatABI == loongarch::FloatABI::Soft) {
++    // FIXME: Note, this is a hack. We need to pass the selected float
++    // mode to the LoongArchTargetInfoBase to define appropriate macros there.
++    // Now it is the only method.
++    Features.push_back("+soft-float");
++  }
++
++  AddTargetFeature(Args, Features, options::OPT_msingle_float,
++                   options::OPT_mdouble_float, "single-float");
++
++  AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx,
++                   "lsx");
++  AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx,
++                   "lasx");
++
++  AddTargetFeature(Args, Features, options::OPT_munaligned_access,
++                   options::OPT_mno_unaligned_access, "unaligned-access");
++
++  // Add the last -mfp32/-mfp64, if none are given and fp64 is default,
++  // pass fp64.
++  if (Arg *A = Args.getLastArg(options::OPT_mfp32,
++                               options::OPT_mfp64)) {
++    if (A->getOption().matches(options::OPT_mfp32))
++      Features.push_back("-fp64");
++    else
++      Features.push_back("+fp64");
++  } else if (loongarch::isFP64Default(Args)) {
++    Features.push_back("+fp64");
++  }
++
++}
++
++bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) {
++  Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
++  return A && (A->getValue() == StringRef(Value));
++}
++
++bool loongarch::isUCLibc(const ArgList &Args) {
++  Arg *A = Args.getLastArg(options::OPT_m_libc_Group);
++  return A && A->getOption().matches(options::OPT_muclibc);
++}
++
++bool loongarch::isFP64Default(const ArgList &Args) {
++  return Args.getLastArg(options::OPT_msingle_float) ? false : true;
++}
+diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.h b/clang/lib/Driver/ToolChains/Arch/LoongArch.h
+new file mode 100644
+index 000000000..53664346f
+--- /dev/null
++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.h
+@@ -0,0 +1,49 @@
++//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H
++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H
++
++#include "clang/Driver/Driver.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/Option/Option.h"
++#include <string>
++#include <vector>
++
++namespace clang {
++namespace driver {
++namespace tools {
++
++namespace loongarch {
++enum class FloatABI {
++  Invalid,
++  Soft,
++  Hard,
++};
++
++void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args,
++                      const llvm::Triple &Triple, StringRef &CPUName,
++                      StringRef &ABIName);
++void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple,
++                           const llvm::opt::ArgList &Args,
++                           std::vector<StringRef> &Features);
++StringRef getGnuCompatibleLoongArchABIName(StringRef ABI);
++loongarch::FloatABI getLoongArchFloatABI(const Driver &D, const llvm::opt::ArgList &Args);
++std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args,
++                                const llvm::Triple &Triple);
++bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value);
++bool isUCLibc(const llvm::opt::ArgList &Args);
++bool isFP64Default(const llvm::opt::ArgList &Args);
++
++} // end namespace loongarch
++} // end namespace target
++} // end namespace driver
++} // end namespace clang
++
++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H
+diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
+index 3704ed858..8381fb9bd 100644
+--- a/clang/lib/Driver/ToolChains/Clang.cpp
++++ b/clang/lib/Driver/ToolChains/Clang.cpp
+@@ -11,6 +11,7 @@
+ #include "Arch/AArch64.h"
+ #include "Arch/ARM.h"
+ #include "Arch/CSKY.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/M68k.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+@@ -317,6 +318,11 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+     arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS);
+     break;
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features);
++    break;
++
+   case llvm::Triple::ppc:
+   case llvm::Triple::ppcle:
+   case llvm::Triple::ppc64:
+@@ -527,6 +533,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args,
+     // XCore never wants frame pointers, regardless of OS.
+     // WebAssembly never wants frame pointers.
+     return false;
++  case llvm::Triple::loongarch64:
++  case llvm::Triple::loongarch32:
+   case llvm::Triple::ppc:
+   case llvm::Triple::ppcle:
+   case llvm::Triple::ppc64:
+@@ -1794,6 +1802,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
+     CmdArgs.push_back("-fallow-half-arguments-and-returns");
+     break;
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    AddLoongArchTargetArgs(Args, CmdArgs);
++    break;
++
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+   case llvm::Triple::mips64:
+@@ -1933,6 +1946,45 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
+   AddUnalignedAccessWarning(CmdArgs);
+ }
+ 
++void Clang::AddLoongArchTargetArgs(const ArgList &Args,
++                                   ArgStringList &CmdArgs) const {
++  const Driver &D = getToolChain().getDriver();
++  StringRef CPUName;
++  StringRef ABIName;
++  const llvm::Triple &Triple = getToolChain().getTriple();
++  loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++
++  CmdArgs.push_back("-target-abi");
++  CmdArgs.push_back(ABIName.data());
++
++  loongarch::FloatABI ABI = loongarch::getLoongArchFloatABI(D, Args);
++  if (ABI == loongarch::FloatABI::Soft) {
++    // Floating point operations and argument passing are soft.
++    CmdArgs.push_back("-msoft-float");
++    CmdArgs.push_back("-mfloat-abi");
++    CmdArgs.push_back("soft");
++  } else {
++    // Floating point operations and argument passing are hard.
++    assert(ABI == loongarch::FloatABI::Hard && "Invalid float abi!");
++    CmdArgs.push_back("-mfloat-abi");
++    CmdArgs.push_back("hard");
++  }
++
++  if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
++                               options::OPT_mno_check_zero_division)) {
++    if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
++      CmdArgs.push_back("-mllvm");
++      CmdArgs.push_back("-mnocheck-zero-division");
++    }
++  }
++
++  llvm::Reloc::Model RelocationModel;
++  unsigned PICLevel;
++  bool IsPIE;
++  std::tie(RelocationModel, PICLevel, IsPIE) =
++      ParsePICArgs(getToolChain(), Args);
++}
++
+ void Clang::AddMIPSTargetArgs(const ArgList &Args,
+                               ArgStringList &CmdArgs) const {
+   const Driver &D = getToolChain().getDriver();
+@@ -7812,6 +7864,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args,
+ 
+ // Begin ClangAs
+ 
++void ClangAs::AddLoongArchTargetArgs(const ArgList &Args,
++                                     ArgStringList &CmdArgs) const {
++  StringRef CPUName;
++  StringRef ABIName;
++  const llvm::Triple &Triple = getToolChain().getTriple();
++  loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++
++  CmdArgs.push_back("-target-abi");
++  CmdArgs.push_back(ABIName.data());
++}
++
+ void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
+                                 ArgStringList &CmdArgs) const {
+   StringRef CPUName;
+@@ -8007,6 +8070,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
+   default:
+     break;
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    AddLoongArchTargetArgs(Args, CmdArgs);
++    break;
++
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+   case llvm::Triple::mips64:
+diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h
+index 5209c6687..e28012af1 100644
+--- a/clang/lib/Driver/ToolChains/Clang.h
++++ b/clang/lib/Driver/ToolChains/Clang.h
+@@ -57,6 +57,8 @@ private:
+                         bool KernelOrKext) const;
+   void AddARM64TargetArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const;
++  void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args,
++                              llvm::opt::ArgStringList &CmdArgs) const;
+   void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
+   void AddPPCTargetArgs(const llvm::opt::ArgList &Args,
+@@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool {
+ public:
+   ClangAs(const ToolChain &TC)
+       : Tool("clang::as", "clang integrated assembler", TC) {}
++  void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args,
++                              llvm::opt::ArgStringList &CmdArgs) const;
+   void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
+   void AddX86TargetArgs(const llvm::opt::ArgList &Args,
+diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
+index 443725f7d..a3778db38 100644
+--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
++++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
+@@ -9,6 +9,7 @@
+ #include "CommonArgs.h"
+ #include "Arch/AArch64.h"
+ #include "Arch/ARM.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/M68k.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+@@ -376,6 +377,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args,
+       return A->getValue();
+     return "";
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64: {
++    StringRef CPUName;
++    StringRef ABIName;
++    loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName);
++    return std::string(CPUName);
++  }
++
+   case llvm::Triple::m68k:
+     return m68k::getM68kTargetCPU(Args);
+ 
+@@ -1378,6 +1387,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
+   if ((ROPI || RWPI) && (PIC || PIE))
+     ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic);
+ 
++  if (Triple.isLoongArch()) {
++    StringRef CPUName;
++    StringRef ABIName;
++    loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++    // When targeting the LP64 ABI, PIC is the default.
++    if (ABIName == "lp64")
++      PIC = true;
++    // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot,
++    // does not use PIC level 2 for historical reasons.
++    IsPICLevelTwo = false;
++  }
++
+   if (Triple.isMIPS()) {
+     StringRef CPUName;
+     StringRef ABIName;
+diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
+index 665cdc313..aad574bbe 100644
+--- a/clang/lib/Driver/ToolChains/Gnu.cpp
++++ b/clang/lib/Driver/ToolChains/Gnu.cpp
+@@ -9,6 +9,7 @@
+ #include "Gnu.h"
+ #include "Arch/ARM.h"
+ #include "Arch/CSKY.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+ #include "Arch/RISCV.h"
+@@ -255,6 +256,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
+   case llvm::Triple::armeb:
+   case llvm::Triple::thumbeb:
+     return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi";
++  case llvm::Triple::loongarch32:
++    return "elf32loongarch";
++  case llvm::Triple::loongarch64:
++    return "elf64loongarch";
+   case llvm::Triple::m68k:
+     return "m68kelf";
+   case llvm::Triple::ppc:
+@@ -856,6 +861,63 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
+ 
+     break;
+   }
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64: {
++    StringRef CPUName;
++    StringRef ABIName;
++    loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
++    ABIName = loongarch::getGnuCompatibleLoongArchABIName(ABIName);
++
++    //FIXME: Currently gnu as doesn't support -march
++    //CmdArgs.push_back("-march=loongarch");
++    //CmdArgs.push_back(CPUName.data());
++
++    //FIXME: modify loongarch::getGnuCompatibleLoongArchABIName()
++    CmdArgs.push_back("-mabi=lp64");
++    //CmdArgs.push_back(ABIName.data());
++
++    // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE,
++    // or -mshared (not implemented) is in effect.
++    if (RelocationModel == llvm::Reloc::Static)
++      CmdArgs.push_back("-mno-shared");
++
++    // LLVM doesn't support -mplt yet and acts as if it is always given.
++    // However, -mplt has no effect with the LP64 ABI.
++    if (ABIName != "64")
++      CmdArgs.push_back("-call_nonpic");
++
++    break;
++
++    // Add the last -mfp32/-mfp64.
++    if (Arg *A = Args.getLastArg(options::OPT_mfp32,
++                                 options::OPT_mfp64)) {
++      A->claim();
++      A->render(Args, CmdArgs);
++    }
++
++    if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) {
++      // Do not use AddLastArg because not all versions of LoongArch assembler
++      // support -mlsx / -mno-lsx options.
++      if (A->getOption().matches(options::OPT_mlsx))
++        CmdArgs.push_back(Args.MakeArgString("-mlsx"));
++    }
++
++    if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) {
++      // Do not use AddLastArg because not all versions of LoongArch assembler
++      // support -mlasx / -mno-lasx options.
++      if (A->getOption().matches(options::OPT_mlasx))
++        CmdArgs.push_back(Args.MakeArgString("-mlasx"));
++    }
++
++    Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
++                    options::OPT_msoft_float);
++
++    Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
++                    options::OPT_msingle_float);
++
++    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
++    break;
++  }
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+   case llvm::Triple::mips64:
+@@ -2294,6 +2356,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
+       "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
+       "s390x-suse-linux", "s390x-redhat-linux"};
+ 
++  static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"};
++  static const char *const LoongArch64Triples[] = {
++      "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu",
++      "loongarch64-loongson-linux-gnu", "loongarch64-redhat-linux"};
+ 
+   using std::begin;
+   using std::end;
+@@ -2466,6 +2532,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
+       BiarchTripleAliases.append(begin(X32Triples), end(X32Triples));
+     }
+     break;
++  case llvm::Triple::loongarch64:
++    LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs));
++    TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples));
++    break;
+   case llvm::Triple::m68k:
+     LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs));
+     TripleAliases.append(begin(M68kTriples), end(M68kTriples));
+@@ -2823,6 +2893,7 @@ bool Generic_GCC::isPICDefault() const {
+   switch (getArch()) {
+   case llvm::Triple::x86_64:
+     return getTriple().isOSWindows();
++  case llvm::Triple::loongarch64:
+   case llvm::Triple::mips64:
+   case llvm::Triple::mips64el:
+     return true;
+@@ -2863,6 +2934,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
+   case llvm::Triple::ppc64le:
+   case llvm::Triple::riscv32:
+   case llvm::Triple::riscv64:
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
+   case llvm::Triple::sparc:
+   case llvm::Triple::sparcel:
+   case llvm::Triple::sparcv9:
+diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
+index ceb1a982c..45adea6dd 100644
+--- a/clang/lib/Driver/ToolChains/Linux.cpp
++++ b/clang/lib/Driver/ToolChains/Linux.cpp
+@@ -8,6 +8,7 @@
+ 
+ #include "Linux.h"
+ #include "Arch/ARM.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+ #include "Arch/RISCV.h"
+@@ -85,6 +86,11 @@ std::string Linux::getMultiarchTriple(const Driver &D,
+   case llvm::Triple::aarch64_be:
+     return "aarch64_be-linux-gnu";
+ 
++  case llvm::Triple::loongarch32:
++    return "loongarch32-linux-gnu";
++  case llvm::Triple::loongarch64:
++    return "loongarch64-linux-gnu";
++
+   case llvm::Triple::m68k:
+     return "m68k-linux-gnu";
+ 
+@@ -473,6 +479,11 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const {
+     Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3";
+     break;
+   }
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple);
++    Loader = "ld.so.1";
++    break;
+   case llvm::Triple::m68k:
+     LibDir = "lib";
+     Loader = "ld.so.1";
+@@ -741,6 +752,7 @@ SanitizerMask Linux::getSupportedSanitizers() const {
+   const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64;
+   const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz;
+   const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon;
++  const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64;
+   SanitizerMask Res = ToolChain::getSupportedSanitizers();
+   Res |= SanitizerKind::Address;
+   Res |= SanitizerKind::PointerCompare;
+@@ -751,19 +763,20 @@ SanitizerMask Linux::getSupportedSanitizers() const {
+   Res |= SanitizerKind::Memory;
+   Res |= SanitizerKind::Vptr;
+   Res |= SanitizerKind::SafeStack;
+-  if (IsX86_64 || IsMIPS64 || IsAArch64)
++  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64)
+     Res |= SanitizerKind::DataFlow;
+   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 ||
+-      IsRISCV64 || IsSystemZ || IsHexagon)
++      IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64)
+     Res |= SanitizerKind::Leak;
+-  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ)
++  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ ||
++      IsLoongArch64)
+     Res |= SanitizerKind::Thread;
+   if (IsX86_64)
+     Res |= SanitizerKind::KernelMemory;
+   if (IsX86 || IsX86_64)
+     Res |= SanitizerKind::Function;
+   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch ||
+-      IsPowerPC64 || IsHexagon)
++      IsPowerPC64 || IsHexagon || IsLoongArch64)
+     Res |= SanitizerKind::Scudo;
+   if (IsX86_64 || IsAArch64) {
+     Res |= SanitizerKind::HWAddress;
+diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
+index 63b575178..4e3ae3f25 100644
+--- a/clang/lib/Driver/XRayArgs.cpp
++++ b/clang/lib/Driver/XRayArgs.cpp
+@@ -42,6 +42,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
+     case llvm::Triple::aarch64:
+     case llvm::Triple::hexagon:
+     case llvm::Triple::ppc64le:
++    case llvm::Triple::loongarch32:
++    case llvm::Triple::loongarch64:
+     case llvm::Triple::mips:
+     case llvm::Triple::mipsel:
+     case llvm::Triple::mips64:
+diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
+index 6e2060991..2ec170cc2 100644
+--- a/clang/lib/Headers/CMakeLists.txt
++++ b/clang/lib/Headers/CMakeLists.txt
+@@ -68,6 +68,12 @@ set(hlsl_files
+   hlsl/hlsl_intrinsics.h
+   )
+ 
++set(loongarch_files
++  lasxintrin.h
++  larchintrin.h
++  lsxintrin.h
++  )
++
+ set(mips_msa_files
+   msa.h
+   )
+@@ -220,6 +226,7 @@ set(files
+   ${hexagon_files}
+   ${hip_files}
+   ${hlsl_files}
++  ${loongarch_files}
+   ${mips_msa_files}
+   ${opencl_files}
+   ${ppc_files}
+@@ -381,6 +388,7 @@ add_dependencies("clang-resource-headers"
+                  "hexagon-resource-headers"
+                  "hip-resource-headers"
+                  "hlsl-resource-headers"
++                 "loongarch-resource-headers"
+                  "mips-resource-headers"
+                  "ppc-resource-headers"
+                  "ppc-htm-resource-headers"
+@@ -404,6 +412,7 @@ add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_on
+ add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files}")
+ add_header_target("hexagon-resource-headers" "${hexagon_files}")
+ add_header_target("hip-resource-headers" "${hip_files}")
++add_header_target("loongarch-resource-headers" "${loongarch_files}")
+ add_header_target("mips-resource-headers" "${mips_msa_files}")
+ add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}")
+ add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}")
+@@ -494,6 +503,12 @@ install(
+   EXCLUDE_FROM_ALL
+   COMPONENT hip-resource-headers)
+ 
++install(
++  FILES ${loongarch_files}
++  DESTINATION ${header_install_dir}
++  EXCLUDE_FROM_ALL
++  COMPONENT loongarch-resource-headers)
++
+ install(
+   FILES ${mips_msa_files}
+   DESTINATION ${header_install_dir}
+diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h
+new file mode 100644
+index 000000000..b5acf218b
+--- /dev/null
++++ b/clang/lib/Headers/larchintrin.h
+@@ -0,0 +1,338 @@
++//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch Base intrinsics
++//
++//===----------------------------------------------------------------------===//
++#ifndef __LOONGARCH_BASE_H
++#define __LOONGARCH_BASE_H
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++typedef struct drdtime{
++	unsigned long dvalue;
++	unsigned long dtimeid;
++} __drdtime_t;
++
++typedef struct rdtime{
++	unsigned int value;
++	unsigned int timeid;
++} __rdtime_t;
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned int, uimm14_32 */
++#define __csrrd_w(/*uimm14_32*/ _1)                                            \
++  ((unsigned int)__builtin_loongarch_csrrd_w(_1))
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned int, uimm14_32 */
++#define __csrwr_w(/*unsigned int*/ _1, /*uimm14_32*/ _2)                       \
++  ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2)))
++
++/* Assembly instruction format:          rd, rj, csr_num */
++/* Data types in instruction templates:  unsigned int, unsigned int, uimm14_32 */
++#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2,                  \
++                    /*uimm14_32*/ _3)                                          \
++  ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1),             \
++                                               (unsigned int)(_2), (_3)))
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned long int, uimm14 */
++#define __csrrd_d(/*uimm14*/ _1)                                               \
++  ((unsigned long int)__builtin_loongarch_csrrd_d(_1))
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned long int, uimm14 */
++#define __csrwr_d(/*unsigned long int*/ _1, /*uimm14*/ _2)                     \
++  ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1),     \
++                                                  (_2)))
++
++/* Assembly instruction format:          rd, rj, csr_num */
++/* Data types in instruction templates:  unsigned long int, unsigned long int, uimm14 */
++#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2,        \
++                    /*uimm14*/ _3)                                             \
++  ((unsigned long int)__builtin_loongarch_csrxchg_d(                           \
++      (unsigned long int)(_1), (unsigned long int)(_2), (_3)))
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned char, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned char __iocsrrd_b(unsigned int _1)
++{
++	return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned short, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned short __iocsrrd_h(unsigned int _1)
++{
++	return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned int __iocsrrd_w(unsigned int _1)
++{
++	return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates: unsigned long int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned long int __iocsrrd_d(unsigned int _1)
++{
++	return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned char, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_b(unsigned char _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned short, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_h(unsigned short _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_w(unsigned int _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2);
++}
++
++extern __inline unsigned int
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++    __cpucfg(unsigned int _1) {
++  return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates: unsigned long int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_d(unsigned long int _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          op, rj, si12 */
++/* Data types in instruction templates: uimm5, unsigned int, simm12 */
++#define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3)            \
++  ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3)))
++
++/* Assembly instruction format:          op, rj, si12 */
++/* Data types in instruction templates: uimm5, unsigned long int, simm12 */
++#define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3)       \
++  ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3)))
++
++#define __rdtime_d	__builtin_loongarch_rdtime_d
++#define __rdtimel_w	__builtin_loongarch_rdtimel_w
++#define __rdtimeh_w	__builtin_loongarch_rdtimeh_w
++
++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__builtin_loongarch_rdtime_d (void)
++{
++  __drdtime_t drdtime;
++  __asm__ volatile (
++    "rdtime.d\t%[val],%[tid]\n\t"
++    : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid)
++    :
++  );
++  return drdtime;
++}
++
++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__builtin_loongarch_rdtimeh_w (void)
++{
++  __rdtime_t rdtime;
++  __asm__ volatile (
++    "rdtimeh.w\t%[val],%[tid]\n\t"
++    : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid)
++    :
++  );
++  return rdtime;
++}
++
++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__builtin_loongarch_rdtimel_w (void)
++{
++  __rdtime_t rdtime;
++  __asm__ volatile (
++    "rdtimel.w\t%[val],%[tid]\n\t"
++    : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid)
++    :
++  );
++  return rdtime;
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, char, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_b_w(char _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, short, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_h_w(short _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_w_w(int _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates: int, long int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_d_w(long int _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, char, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_b_w(char _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, short, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_h_w(short _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_w_w(int _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates: int, long int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_d_w(long int _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2);
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbclr()
++{
++	return (void)__builtin_loongarch_tlbclr();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbflush()
++{
++	return (void)__builtin_loongarch_tlbflush();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbfill()
++{
++	return (void)__builtin_loongarch_tlbfill();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbrd()
++{
++	return (void)__builtin_loongarch_tlbrd();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbwr()
++{
++	return (void)__builtin_loongarch_tlbwr();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbsrch()
++{
++	return (void)__builtin_loongarch_tlbsrch();
++}
++
++/* Assembly instruction format:          code */
++/* Data types in instruction templates:  uimm15 */
++#define __syscall(/*uimm15*/ _1)	((void)__builtin_loongarch_syscall(_1))
++
++/* Assembly instruction format:          code */
++/* Data types in instruction templates:  uimm15 */
++#define __break(/*uimm15*/ _1)	((void)__builtin_loongarch_break(_1))
++
++/* Assembly instruction format:          hint */
++/* Data types in instruction templates:  uimm15 */
++#define __dbar(/*uimm15*/ _1)	((void)__builtin_loongarch_dbar(_1))
++
++/* Assembly instruction format:          hint */
++/* Data types in instruction templates:  uimm15 */
++#define __ibar(/*uimm15*/ _1)	((void)__builtin_loongarch_ibar(_1))
++
++/* Assembly instruction format:          rj, rk */
++/* Data types in instruction templates:  long int, long int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __asrtle_d(long int _1, long int _2)
++{
++	return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2);
++}
++
++/* Assembly instruction format:          rj, rk */
++/* Data types in instruction templates:  long int, long int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __asrtgt_d(long int _1, long int _2)
++{
++	return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2);
++}
++
++#define __movfcsr2gr(uimm5) \
++({ \
++  unsigned int rd; \
++  __asm__ volatile ( \
++    "movfcsr2gr %0, $fcsr" #uimm5 \
++    : "=&r"(rd) \
++    : \
++  ); rd; \
++})
++
++#define __movgr2fcsr(uimm5, rj) \
++{ \
++  __asm__ volatile ( \
++    "movgr2fcsr $fcsr" #uimm5 ", %0" \
++    : \
++    : "r" (rj) \
++  ); \
++}
++
++#ifdef __cplusplus
++}
++#endif
++#endif /* __LOONGARCH_BASE_H */
+diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h
+new file mode 100644
+index 000000000..c454b0c9e
+--- /dev/null
++++ b/clang/lib/Headers/lasxintrin.h
+@@ -0,0 +1,5337 @@
++//===----------- lasxintrin.h - LoongArch LASX intrinsics
++//------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch LASX intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef _GCC_LOONGSON_ASXINTRIN_H
++#define _GCC_LOONGSON_ASXINTRIN_H 1
++
++#if defined(__loongarch_asx)
++
++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1)));
++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1)));
++typedef short v16i16 __attribute__((vector_size(32), aligned(32)));
++typedef short v16i16_h __attribute__((vector_size(32), aligned(2)));
++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2)));
++typedef int v8i32 __attribute__((vector_size(32), aligned(32)));
++typedef int v8i32_w __attribute__((vector_size(32), aligned(4)));
++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4)));
++typedef long long v4i64 __attribute__((vector_size(32), aligned(32)));
++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8)));
++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8)));
++typedef float v8f32 __attribute__((vector_size(32), aligned(32)));
++typedef float v8f32_w __attribute__((vector_size(32), aligned(4)));
++typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
++typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
++
++typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
++typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
++
++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__));
++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__));
++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__));
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI.  */
++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI.  */
++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI.  */
++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI.  */
++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI.  */
++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI.  */
++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI.  */
++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI.  */
++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI.  */
++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V32QI, V32QI, QI.  */
++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V16HI, V16HI, QI.  */
++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V8SI, V8SI, QI.  */
++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V4DI, V4DI, QI.  */
++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI.  */
++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI.  */
++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI.  */
++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V32QI, V32QI, QI.  */
++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V16HI, V16HI, QI.  */
++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V8SI, V8SI, QI.  */
++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V4DI, V4DI, QI.  */
++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI.  */
++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI.  */
++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI.  */
++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V32QI, V32QI, QI.  */
++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V16HI, V16HI, QI.  */
++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V8SI, V8SI, QI.  */
++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V4DI, V4DI, QI.  */
++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V32QI, V32QI, QI.  */
++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V16HI, V16HI, QI.  */
++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V8SI, V8SI, QI.  */
++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V4DI, V4DI, QI.  */
++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, UV32QI, UQI.  */
++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, UV16HI, UQI.  */
++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, UV8SI, UQI.  */
++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UQI.  */
++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V32QI, V32QI, QI.  */
++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V16HI, V16HI, QI.  */
++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V8SI, V8SI, QI.  */
++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5.  */
++/* Data types in instruction templates:  V4DI, V4DI, QI.  */
++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, UV32QI, UQI.  */
++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, UV16HI, UQI.  */
++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, UV8SI, UQI.  */
++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UQI.  */
++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI.  */
++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI.  */
++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI.  */
++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui2.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui1.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvand_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvor_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvnor_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvxor_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI.  */
++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI, USI.  */
++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V32QI, V32QI, USI.  */
++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V16HI, V16HI, USI.  */
++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V8SI, V8SI, USI.  */
++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj.  */
++/* Data types in instruction templates:  V32QI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_b(int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1);
++}
++
++/* Assembly instruction format:          xd, rj.  */
++/* Data types in instruction templates:  V16HI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_h(int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1);
++}
++
++/* Assembly instruction format:          xd, rj.  */
++/* Data types in instruction templates:  V8SI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_w(int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1);
++}
++
++/* Assembly instruction format:          xd, rj.  */
++/* Data types in instruction templates:  V4DI, DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_d(long int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfadd_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfadd_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfsub_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfsub_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmul_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmul_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfdiv_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfdiv_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) {
++  return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmin_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmin_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmina_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmina_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmax_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmax_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmaxa_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmaxa_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfclass_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfclass_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfsqrt_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfsqrt_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrecip_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrecip_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrint_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrint_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrsqrt_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrsqrt_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvflogb_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvflogb_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfcvth_s_h(__m256i _1) {
++  return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfcvth_d_s(__m256 _1) {
++  return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfcvtl_s_h(__m256i _1) {
++  return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfcvtl_d_s(__m256 _1) {
++  return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_wu_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_lu_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_wu_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_lu_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvffint_s_w(__m256i _1) {
++  return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffint_d_l(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvffint_s_wu(__m256i _1) {
++  return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffint_d_lu(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, rk.  */
++/* Data types in instruction templates:  V32QI, V32QI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_b(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, rk.  */
++/* Data types in instruction templates:  V16HI, V16HI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_h(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, rk.  */
++/* Data types in instruction templates:  V8SI, V8SI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_w(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, rk.  */
++/* Data types in instruction templates:  V4DI, V4DI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_d(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)           \
++  ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvandn_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V16HI, V32QI, UQI.  */
++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2)                        \
++  ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V8SI, V16HI, UQI.  */
++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2)                        \
++  ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V4DI, V8SI, UQI.  */
++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2)                        \
++  ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  UV16HI, UV32QI, UQI.  */
++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV8SI, UV16HI, UQI.  */
++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV4DI, UV8SI, UQI.  */
++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsran_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsran_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsran_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrln_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrln_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrln_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, UQI.  */
++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)          \
++  ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, UQI.  */
++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)          \
++  ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)          \
++  ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa.  */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrne_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrne_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrp_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrp_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrm_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrm_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SF, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvffint_s_l(__m256i _1, __m256i _2) {
++  return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftinth_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintl_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffinth_d_w(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffintl_d_w(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrzh_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrzl_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrph_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrpl_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrmh_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrml_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrneh_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrnel_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrintrne_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrintrne_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrintrz_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrintrz_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrintrp_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrintrp_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrintrm_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrintrm_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, rj, si12.  */
++/* Data types in instruction templates:  V32QI, CVPOINTER, SI.  */
++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2)                                \
++  ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si12.  */
++/* Data types in instruction templates:  VOID, V32QI, CVPOINTER, SI.  */
++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3)                \
++  ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3)))
++
++/* Assembly instruction format:          xd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V32QI, CVPOINTER, SI, UQI.  */
++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V16HI, CVPOINTER, SI, UQI.  */
++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V8SI, CVPOINTER, SI, UQI.  */
++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V4DI, CVPOINTER, SI, UQI.  */
++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, UQI.  */
++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3)          \
++  ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui2.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, UQI.  */
++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3)          \
++  ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui2.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvorn_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, i13.  */
++/* Data types in instruction templates:  V4DI, HI.  */
++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1)))
++
++/* Assembly instruction format:          xd, rj, rk.  */
++/* Data types in instruction templates:  V32QI, CVPOINTER, DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvldx(void const *_1, long int _2) {
++  return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2);
++}
++
++/* Assembly instruction format:          xd, rj, rk.  */
++/* Data types in instruction templates:  VOID, V32QI, CVPOINTER, DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void
++    __lasx_xvstx(__m256i _1, void *_2, long int _3) {
++  return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvextl_qu_du(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1);
++}
++
++/* Assembly instruction format:          xd, rj, ui3.  */
++/* Data types in instruction templates:  V8SI, V8SI, SI, UQI.  */
++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3)            \
++  ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          xd, rj, ui2.  */
++/* Data types in instruction templates:  V4DI, V4DI, DI, UQI.  */
++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3)       \
++  ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_q(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_h_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_w_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_d_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_w_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_d_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_d_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_hu_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_wu_hu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_du_wu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_wu_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_du_hu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_du_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)           \
++  ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8.  */
++/* Data types in instruction templates:  V4DI, V4DI, USI.  */
++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvperm_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, rj, si12.  */
++/* Data types in instruction templates:  V32QI, CVPOINTER, SI.  */
++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si11.  */
++/* Data types in instruction templates:  V16HI, CVPOINTER, SI.  */
++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si10.  */
++/* Data types in instruction templates:  V8SI, CVPOINTER, SI.  */
++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si9.  */
++/* Data types in instruction templates:  V4DI, CVPOINTER, SI.  */
++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui3.  */
++/* Data types in instruction templates:  SI, V8SI, UQI.  */
++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2)                       \
++  ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui3.  */
++/* Data types in instruction templates:  USI, V8SI, UQI.  */
++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2)                      \
++  ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui2.  */
++/* Data types in instruction templates:  DI, V4DI, UQI.  */
++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2)                       \
++  ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui2.  */
++/* Data types in instruction templates:  UDI, V4DI, UQI.  */
++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2)                      \
++  ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2,
++                                               (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2,
++                                               (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2,
++                                                (v4u64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2,
++                                                (v8u32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2,
++                                                (v16u16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2,
++                                                (v32u8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2,
++                                               (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2,
++                                               (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2,
++                                                (v4u64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV8SI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2,
++                                                (v8u32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV16HI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2,
++                                                (v16u16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV32QI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2,
++                                                (v32u8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, UV4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2,
++                                                  (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, UV8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2,
++                                                  (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, UV16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2,
++                                                  (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, UV32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2,
++                                                  (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, UV4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2,
++                                                  (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, UV8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2,
++                                                  (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, UV16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2,
++                                                  (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, UV32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2,
++                                                  (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_q(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_q(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskgez_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V32QI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsknz_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V16HI, V32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_h_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V8SI, V16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_w_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_d_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_q_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV16HI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_hu_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV8SI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_wu_hu(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_du_wu(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_qu_du(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V32QI, V32QI, UQI.  */
++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V16HI, V16HI, UQI.  */
++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V8SI, V8SI, UQI.  */
++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V4DI, V4DI, UQI.  */
++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvextl_q_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI.  */
++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI.  */
++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI.  */
++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI.  */
++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI.  */
++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI.  */
++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI.  */
++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI.  */
++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI.  */
++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI.  */
++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI.  */
++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI.  */
++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI.  */
++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI.  */
++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI.  */
++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI.  */
++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4.  */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI.  */
++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5.  */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI.  */
++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6.  */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI.  */
++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7.  */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI.  */
++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV32QI.  */
++#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV4DI.  */
++#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV16HI.  */
++#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV32QI.  */
++#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV8SI.  */
++#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV32QI.  */
++#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV4DI.  */
++#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV16HI.  */
++#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV32QI.  */
++#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1)))
++
++/* Assembly instruction format:          cd, xj.  */
++/* Data types in instruction templates:  SI, UV8SI.  */
++#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1)))
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk.  */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui2.  */
++/* Data types in instruction templates:  V4DF, V4DF, UQI.  */
++#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2)                        \
++  ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3.  */
++/* Data types in instruction templates:  V8SF, V8SF, UQI.  */
++#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2)                         \
++  ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, si10.  */
++/* Data types in instruction templates:  V32QI, HI.  */
++#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1)))
++
++/* Assembly instruction format:          xd, si10.  */
++/* Data types in instruction templates:  V4DI, HI.  */
++#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1)))
++
++/* Assembly instruction format:          xd, si10.  */
++/* Data types in instruction templates:  V16HI, HI.  */
++#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1)))
++
++/* Assembly instruction format:          xd, si10.  */
++/* Data types in instruction templates:  V8SI, HI.  */
++#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
++
++#endif /* defined(__loongarch_asx).  */
++#endif /* _GCC_LOONGSON_ASXINTRIN_H.  */
+diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h
+new file mode 100644
+index 000000000..48344c209
+--- /dev/null
++++ b/clang/lib/Headers/lsxintrin.h
+@@ -0,0 +1,5162 @@
++//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch LSX intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef _GCC_LOONGSON_SXINTRIN_H
++#define _GCC_LOONGSON_SXINTRIN_H 1
++
++#if defined(__loongarch_sx)
++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));
++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));
++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));
++typedef short v8i16 __attribute__((vector_size(16), aligned(16)));
++typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));
++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));
++typedef int v4i32 __attribute__((vector_size(16), aligned(16)));
++typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));
++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));
++typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));
++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));
++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));
++typedef float v4f32 __attribute__((vector_size(16), aligned(16)));
++typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));
++typedef double v2f64 __attribute__((vector_size(16), aligned(16)));
++typedef double v2f64_d __attribute__((vector_size(16), aligned(8)));
++
++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__));
++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__));
++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__));
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI.  */
++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI.  */
++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI.  */
++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI.  */
++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI.  */
++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI.  */
++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI.  */
++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI.  */
++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI.  */
++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V16QI, V16QI, QI.  */
++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V8HI, V8HI, QI.  */
++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V4SI, V4SI, QI.  */
++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V2DI, V2DI, QI.  */
++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI.  */
++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI.  */
++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI.  */
++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V16QI, V16QI, QI.  */
++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V8HI, V8HI, QI.  */
++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V4SI, V4SI, QI.  */
++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V2DI, V2DI, QI.  */
++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI.  */
++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI.  */
++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI.  */
++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V16QI, V16QI, QI.  */
++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V8HI, V8HI, QI.  */
++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V4SI, V4SI, QI.  */
++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V2DI, V2DI, QI.  */
++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V16QI, V16QI, QI.  */
++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V8HI, V8HI, QI.  */
++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V4SI, V4SI, QI.  */
++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V2DI, V2DI, QI.  */
++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, UV16QI, UQI.  */
++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, UV8HI, UQI.  */
++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, UV4SI, UQI.  */
++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UQI.  */
++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V16QI, V16QI, QI.  */
++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V8HI, V8HI, QI.  */
++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V4SI, V4SI, QI.  */
++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5.  */
++/* Data types in instruction templates:  V2DI, V2DI, QI.  */
++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, UV16QI, UQI.  */
++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, UV8HI, UQI.  */
++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, UV4SI, UQI.  */
++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UQI.  */
++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI.  */
++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI.  */
++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI.  */
++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk.  */
++/* Data types in instruction templates:  V16QI, V16QI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_b(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk.  */
++/* Data types in instruction templates:  V8HI, V8HI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_h(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk.  */
++/* Data types in instruction templates:  V4SI, V4SI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_w(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk.  */
++/* Data types in instruction templates:  V2DI, V2DI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_d(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui2.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui1.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vand_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2)                              \
++  ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vor_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2)                               \
++  ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vnor_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2)                              \
++  ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vxor_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI.  */
++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2)                              \
++  ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI, USI.  */
++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V16QI, V16QI, USI.  */
++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2)                            \
++  ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V8HI, V8HI, USI.  */
++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2)                            \
++  ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V4SI, V4SI, USI.  */
++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2)                            \
++  ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj.  */
++/* Data types in instruction templates:  V16QI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_b(int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1);
++}
++
++/* Assembly instruction format:          vd, rj.  */
++/* Data types in instruction templates:  V8HI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_h(int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1);
++}
++
++/* Assembly instruction format:          vd, rj.  */
++/* Data types in instruction templates:  V4SI, SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_w(int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1);
++}
++
++/* Assembly instruction format:          vd, rj.  */
++/* Data types in instruction templates:  V2DI, DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_d(long int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          rd, vj, ui4.  */
++/* Data types in instruction templates:  SI, V16QI, UQI.  */
++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2)                         \
++  ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui3.  */
++/* Data types in instruction templates:  SI, V8HI, UQI.  */
++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2)                         \
++  ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui2.  */
++/* Data types in instruction templates:  SI, V4SI, UQI.  */
++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2)                         \
++  ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui1.  */
++/* Data types in instruction templates:  DI, V2DI, UQI.  */
++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2)                         \
++  ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui4.  */
++/* Data types in instruction templates:  USI, V16QI, UQI.  */
++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2)                        \
++  ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui3.  */
++/* Data types in instruction templates:  USI, V8HI, UQI.  */
++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2)                        \
++  ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui2.  */
++/* Data types in instruction templates:  USI, V4SI, UQI.  */
++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2)                        \
++  ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui1.  */
++/* Data types in instruction templates:  UDI, V2DI, UQI.  */
++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2)                        \
++  ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, SI, UQI.  */
++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3)              \
++  ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, ui3.  */
++/* Data types in instruction templates:  V8HI, V8HI, SI, UQI.  */
++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3)              \
++  ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, ui2.  */
++/* Data types in instruction templates:  V4SI, V4SI, SI, UQI.  */
++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3)              \
++  ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, ui1.  */
++/* Data types in instruction templates:  V2DI, V2DI, DI, UQI.  */
++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3)         \
++  ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfadd_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfadd_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfsub_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfsub_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmul_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmul_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfdiv_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfdiv_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcvt_h_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfcvt_s_d(__m128d _1, __m128d _2) {
++  return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmin_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmin_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmina_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmina_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmax_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmax_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmaxa_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmaxa_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfclass_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfclass_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfsqrt_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfsqrt_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrecip_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrecip_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrint_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrint_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrint_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrsqrt_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrsqrt_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vflogb_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vflogb_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vflogb_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfcvth_s_h(__m128i _1) {
++  return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfcvth_d_s(__m128 _1) {
++  return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfcvtl_s_h(__m128i _1) {
++  return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfcvtl_d_s(__m128 _1) {
++  return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_wu_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_lu_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_wu_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_lu_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vffint_s_w(__m128i _1) {
++  return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffint_d_l(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vffint_s_wu(__m128i _1) {
++  return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffint_d_lu(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vandn_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V8HI, V16QI, UQI.  */
++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2)                          \
++  ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V4SI, V8HI, UQI.  */
++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2)                          \
++  ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V2DI, V4SI, UQI.  */
++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2)                          \
++  ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  UV8HI, UV16QI, UQI.  */
++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2)                        \
++  ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV4SI, UV8HI, UQI.  */
++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2)                        \
++  ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV2DI, UV4SI, UQI.  */
++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2)                        \
++  ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsran_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsran_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsran_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrarn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrarn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrarn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrln_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrln_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrln_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, UQI.  */
++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)            \
++  ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, UQI.  */
++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)            \
++  ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)            \
++  ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrne_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrne_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrp_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrp_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrm_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrm_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SF, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vffint_s_l(__m128i _1, __m128i _2) {
++  return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrp_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrm_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrne_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintl_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftinth_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffinth_d_w(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffintl_d_w(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrzl_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrzh_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrpl_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrph_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrml_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrmh_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrnel_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrneh_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrintrne_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrintrne_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrintrz_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrintrz_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrintrp_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrintrp_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrintrm_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrintrm_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V16QI, CVPOINTER, SI, UQI.  */
++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V8HI, CVPOINTER, SI, UQI.  */
++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V4SI, CVPOINTER, SI, UQI.  */
++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, rj, si8, idx.  */
++/* Data types in instruction templates:  VOID, V2DI, CVPOINTER, SI, UQI.  */
++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV4SI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV8HI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV16QI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, UV4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2,
++                                                (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, UV8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2,
++                                                (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, UV16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2,
++                                                (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, UV4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2,
++                                                (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, UV8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2,
++                                                (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, UV16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2,
++                                                (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, UV2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2,
++                                                (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, UV2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2,
++                                                (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_q(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_q(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, rj, si12.  */
++/* Data types in instruction templates:  V16QI, CVPOINTER, SI.  */
++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2)                            \
++  ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si11.  */
++/* Data types in instruction templates:  V8HI, CVPOINTER, SI.  */
++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2)                            \
++  ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si10.  */
++/* Data types in instruction templates:  V4SI, CVPOINTER, SI.  */
++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2)                            \
++  ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si9.  */
++/* Data types in instruction templates:  V2DI, CVPOINTER, SI.  */
++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2)                             \
++  ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskgez_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsknz_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V8HI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_h_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V4SI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_w_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_d_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_q_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV8HI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_hu_bu(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV4SI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_wu_hu(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_du_wu(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_qu_du(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, ui3.  */
++/* Data types in instruction templates:  V16QI, V16QI, UQI.  */
++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V8HI, V8HI, UQI.  */
++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V4SI, V4SI, UQI.  */
++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V2DI, V2DI, UQI.  */
++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vextl_q_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI.  */
++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI.  */
++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI.  */
++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI.  */
++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI.  */
++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI.  */
++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI.  */
++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI.  */
++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI.  */
++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI.  */
++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI.  */
++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI.  */
++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI.  */
++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI.  */
++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI.  */
++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4.  */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI.  */
++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5.  */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI.  */
++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6.  */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI.  */
++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7.  */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI.  */
++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8.  */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI.  */
++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)             \
++  ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, si12.  */
++/* Data types in instruction templates:  V16QI, CVPOINTER, SI.  */
++#define __lsx_vld(/*void **/ _1, /*si12*/ _2)                                  \
++  ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si12.  */
++/* Data types in instruction templates:  VOID, V16QI, CVPOINTER, SI.  */
++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3)                  \
++  ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vorn_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, i13.  */
++/* Data types in instruction templates:  V2DI, HI.  */
++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1)))
++
++/* Assembly instruction format:          vd, vj, vk, va.  */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, rj, rk.  */
++/* Data types in instruction templates:  V16QI, CVPOINTER, DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vldx(void const *_1, long int _2) {
++  return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2);
++}
++
++/* Assembly instruction format:          vd, rj, rk.  */
++/* Data types in instruction templates:  VOID, V16QI, CVPOINTER, DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void
++    __lsx_vstx(__m128i _1, void *_2, long int _3) {
++  return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vextl_qu_du(__m128i _1) {
++  return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1);
++}
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV16QI.  */
++#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV2DI.  */
++#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV8HI.  */
++#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV16QI.  */
++#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV4SI.  */
++#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV16QI.  */
++#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV2DI.  */
++#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV8HI.  */
++#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV16QI.  */
++#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1)))
++
++/* Assembly instruction format:          cd, vj.  */
++/* Data types in instruction templates:  SI, UV4SI.  */
++#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1)))
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk.  */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, si10.  */
++/* Data types in instruction templates:  V16QI, HI.  */
++#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1)))
++
++/* Assembly instruction format:          vd, si10.  */
++/* Data types in instruction templates:  V2DI, HI.  */
++#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1)))
++
++/* Assembly instruction format:          vd, si10.  */
++/* Data types in instruction templates:  V8HI, HI.  */
++#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1)))
++
++/* Assembly instruction format:          vd, si10.  */
++/* Data types in instruction templates:  V4SI, HI.  */
++#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1)))
++
++#endif /* defined(__loongarch_sx) */
++#endif /* _GCC_LOONGSON_SXINTRIN_H */
+diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
+index dae51d069..5504f9937 100644
+--- a/clang/lib/Sema/SemaChecking.cpp
++++ b/clang/lib/Sema/SemaChecking.cpp
+@@ -1981,6 +1981,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
+   case llvm::Triple::riscv32:
+   case llvm::Triple::riscv64:
+     return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall);
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall);
+   }
+ }
+ 
+@@ -4445,6 +4448,559 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI,
+   return false;
+ }
+ 
++// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the
++// intrinsic is correct.
++//
++// FIXME: The size tests here should instead be tablegen'd along with the
++//        definitions from include/clang/Basic/BuiltinsLoongArch.def.
++// FIXME: GCC is strict on signedness for some of these intrinsics, we should
++//        be too.
++bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI,
++                                             unsigned BuiltinID,
++                                             CallExpr *TheCall) {
++  unsigned i = 0, l = 0, u = 0, m = 0;
++  switch (BuiltinID) {
++  default: return false;
++  // LSX/LASX intrinsics.
++  // These intrinsics take an unsigned 3 bit immediate.
++  case LoongArch::BI__builtin_lsx_vbitclri_b:
++  case LoongArch::BI__builtin_lasx_xvbitclri_b:
++  case LoongArch::BI__builtin_lsx_vbitrevi_b:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_b:
++  case LoongArch::BI__builtin_lsx_vbitseti_b:
++  case LoongArch::BI__builtin_lasx_xvbitseti_b:
++  case LoongArch::BI__builtin_lsx_vsat_b:
++  case LoongArch::BI__builtin_lsx_vsat_bu:
++  case LoongArch::BI__builtin_lasx_xvsat_b:
++  case LoongArch::BI__builtin_lasx_xvsat_bu:
++  case LoongArch::BI__builtin_lsx_vslli_b:
++  case LoongArch::BI__builtin_lasx_xvslli_b:
++  case LoongArch::BI__builtin_lsx_vsrai_b:
++  case LoongArch::BI__builtin_lasx_xvsrai_b:
++  case LoongArch::BI__builtin_lsx_vsrari_b:
++  case LoongArch::BI__builtin_lasx_xvsrari_b:
++  case LoongArch::BI__builtin_lsx_vsrli_b:
++  case LoongArch::BI__builtin_lasx_xvsrli_b:
++  case LoongArch::BI__builtin_lsx_vsllwil_h_b:
++  case LoongArch::BI__builtin_lsx_vsllwil_hu_bu:
++  case LoongArch::BI__builtin_lasx_xvsllwil_h_b:
++  case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu:
++  case LoongArch::BI__builtin_lsx_vrotri_b:
++  case LoongArch::BI__builtin_lasx_xvrotri_b:
++  case LoongArch::BI__builtin_lasx_xvsrlri_b:
++  case LoongArch::BI__builtin_lsx_vsrlri_b:
++    i = 1;
++    l = 0;
++    u = 7;
++    break;
++  // These intrinsics take an unsigned 4 bit immediate.
++  case LoongArch::BI__builtin_lsx_vbitclri_h:
++  case LoongArch::BI__builtin_lasx_xvbitclri_h:
++  case LoongArch::BI__builtin_lsx_vbitrevi_h:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_h:
++  case LoongArch::BI__builtin_lsx_vbitseti_h:
++  case LoongArch::BI__builtin_lasx_xvbitseti_h:
++  case LoongArch::BI__builtin_lsx_vsat_h:
++  case LoongArch::BI__builtin_lsx_vsat_hu:
++  case LoongArch::BI__builtin_lasx_xvsat_h:
++  case LoongArch::BI__builtin_lasx_xvsat_hu:
++  case LoongArch::BI__builtin_lsx_vslli_h:
++  case LoongArch::BI__builtin_lasx_xvslli_h:
++  case LoongArch::BI__builtin_lsx_vsrai_h:
++  case LoongArch::BI__builtin_lasx_xvsrai_h:
++  case LoongArch::BI__builtin_lsx_vsrari_h:
++  case LoongArch::BI__builtin_lasx_xvsrari_h:
++  case LoongArch::BI__builtin_lsx_vsrli_h:
++  case LoongArch::BI__builtin_lasx_xvsrli_h:
++  case LoongArch::BI__builtin_lsx_vsllwil_w_h:
++  case LoongArch::BI__builtin_lsx_vsllwil_wu_hu:
++  case LoongArch::BI__builtin_lasx_xvsllwil_w_h:
++  case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu:
++  case LoongArch::BI__builtin_lsx_vrotri_h:
++  case LoongArch::BI__builtin_lasx_xvrotri_h:
++  case LoongArch::BI__builtin_lasx_xvsrlri_h:
++  case LoongArch::BI__builtin_lsx_vsrlri_h:
++    i = 1;
++    l = 0;
++    u = 15;
++    break;
++  case LoongArch::BI__builtin_lsx_vssrarni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrarni_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrarni_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrarni_bu_h:
++  case LoongArch::BI__builtin_lsx_vssrani_b_h:
++  case LoongArch::BI__builtin_lsx_vssrani_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrani_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrani_bu_h:
++  case LoongArch::BI__builtin_lsx_vsrarni_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrarni_b_h:
++  case LoongArch::BI__builtin_lsx_vsrlni_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrlni_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrlni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrlni_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrlni_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrlni_bu_h:
++  case LoongArch::BI__builtin_lsx_vssrlrni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrlrni_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h:
++  case LoongArch::BI__builtin_lsx_vsrani_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrani_b_h:
++    i = 2;
++    l = 0;
++    u = 15;
++    break;
++  // These intrinsics take an unsigned 5 bit immediate.
++  // The first block of intrinsics actually have an unsigned 5 bit field,
++  // not a df/n field.
++  case LoongArch::BI__builtin_lsx_vslei_bu:
++  case LoongArch::BI__builtin_lsx_vslei_hu:
++  case LoongArch::BI__builtin_lsx_vslei_wu:
++  case LoongArch::BI__builtin_lsx_vslei_du:
++  case LoongArch::BI__builtin_lasx_xvslei_bu:
++  case LoongArch::BI__builtin_lasx_xvslei_hu:
++  case LoongArch::BI__builtin_lasx_xvslei_wu:
++  case LoongArch::BI__builtin_lasx_xvslei_du:
++  case LoongArch::BI__builtin_lsx_vslti_bu:
++  case LoongArch::BI__builtin_lsx_vslti_hu:
++  case LoongArch::BI__builtin_lsx_vslti_wu:
++  case LoongArch::BI__builtin_lsx_vslti_du:
++  case LoongArch::BI__builtin_lasx_xvslti_bu:
++  case LoongArch::BI__builtin_lasx_xvslti_hu:
++  case LoongArch::BI__builtin_lasx_xvslti_wu:
++  case LoongArch::BI__builtin_lasx_xvslti_du:
++  case LoongArch::BI__builtin_lsx_vmaxi_bu:
++  case LoongArch::BI__builtin_lsx_vmaxi_hu:
++  case LoongArch::BI__builtin_lsx_vmaxi_wu:
++  case LoongArch::BI__builtin_lsx_vmaxi_du:
++  case LoongArch::BI__builtin_lasx_xvmaxi_bu:
++  case LoongArch::BI__builtin_lasx_xvmaxi_hu:
++  case LoongArch::BI__builtin_lasx_xvmaxi_wu:
++  case LoongArch::BI__builtin_lasx_xvmaxi_du:
++  case LoongArch::BI__builtin_lsx_vmini_bu:
++  case LoongArch::BI__builtin_lsx_vmini_hu:
++  case LoongArch::BI__builtin_lsx_vmini_wu:
++  case LoongArch::BI__builtin_lsx_vmini_du:
++  case LoongArch::BI__builtin_lasx_xvmini_bu:
++  case LoongArch::BI__builtin_lasx_xvmini_hu:
++  case LoongArch::BI__builtin_lasx_xvmini_wu:
++  case LoongArch::BI__builtin_lasx_xvmini_du:
++  case LoongArch::BI__builtin_lsx_vaddi_bu:
++  case LoongArch::BI__builtin_lsx_vaddi_hu:
++  case LoongArch::BI__builtin_lsx_vaddi_wu:
++  case LoongArch::BI__builtin_lsx_vaddi_du:
++  case LoongArch::BI__builtin_lasx_xvaddi_bu:
++  case LoongArch::BI__builtin_lasx_xvaddi_hu:
++  case LoongArch::BI__builtin_lasx_xvaddi_wu:
++  case LoongArch::BI__builtin_lasx_xvaddi_du:
++  case LoongArch::BI__builtin_lsx_vbitclri_w:
++  case LoongArch::BI__builtin_lasx_xvbitclri_w:
++  case LoongArch::BI__builtin_lsx_vbitrevi_w:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_w:
++  case LoongArch::BI__builtin_lsx_vbitseti_w:
++  case LoongArch::BI__builtin_lasx_xvbitseti_w:
++  case LoongArch::BI__builtin_lsx_vsat_w:
++  case LoongArch::BI__builtin_lsx_vsat_wu:
++  case LoongArch::BI__builtin_lasx_xvsat_w:
++  case LoongArch::BI__builtin_lasx_xvsat_wu:
++  case LoongArch::BI__builtin_lsx_vslli_w:
++  case LoongArch::BI__builtin_lasx_xvslli_w:
++  case LoongArch::BI__builtin_lsx_vsrai_w:
++  case LoongArch::BI__builtin_lasx_xvsrai_w:
++  case LoongArch::BI__builtin_lsx_vsrari_w:
++  case LoongArch::BI__builtin_lasx_xvsrari_w:
++  case LoongArch::BI__builtin_lsx_vsrli_w:
++  case LoongArch::BI__builtin_lasx_xvsrli_w:
++  case LoongArch::BI__builtin_lsx_vsllwil_d_w:
++  case LoongArch::BI__builtin_lsx_vsllwil_du_wu:
++  case LoongArch::BI__builtin_lasx_xvsllwil_d_w:
++  case LoongArch::BI__builtin_lasx_xvsllwil_du_wu:
++  case LoongArch::BI__builtin_lsx_vsrlri_w:
++  case LoongArch::BI__builtin_lasx_xvsrlri_w:
++  case LoongArch::BI__builtin_lsx_vrotri_w:
++  case LoongArch::BI__builtin_lasx_xvrotri_w:
++  case LoongArch::BI__builtin_lsx_vsubi_bu:
++  case LoongArch::BI__builtin_lsx_vsubi_hu:
++  case LoongArch::BI__builtin_lasx_xvsubi_bu:
++  case LoongArch::BI__builtin_lasx_xvsubi_hu:
++  case LoongArch::BI__builtin_lasx_xvsubi_wu:
++  case LoongArch::BI__builtin_lasx_xvsubi_du:
++  case LoongArch::BI__builtin_lsx_vbsrl_v:
++  case LoongArch::BI__builtin_lsx_vbsll_v:
++  case LoongArch::BI__builtin_lasx_xvbsrl_v:
++  case LoongArch::BI__builtin_lasx_xvbsll_v:
++  case LoongArch::BI__builtin_lsx_vsubi_wu:
++  case LoongArch::BI__builtin_lsx_vsubi_du:
++    i = 1;
++    l = 0;
++    u = 31;
++    break;
++  case LoongArch::BI__builtin_lsx_vssrarni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrarni_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrarni_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrarni_hu_w:
++  case LoongArch::BI__builtin_lsx_vssrani_h_w:
++  case LoongArch::BI__builtin_lsx_vssrani_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrani_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrani_hu_w:
++  case LoongArch::BI__builtin_lsx_vsrarni_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrarni_h_w:
++  case LoongArch::BI__builtin_lsx_vsrani_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrani_h_w:
++  case LoongArch::BI__builtin_lsx_vfrstpi_b:
++  case LoongArch::BI__builtin_lsx_vfrstpi_h:
++  case LoongArch::BI__builtin_lasx_xvfrstpi_b:
++  case LoongArch::BI__builtin_lasx_xvfrstpi_h:
++  case LoongArch::BI__builtin_lsx_vsrlni_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrlni_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrlni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrlni_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrlni_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrlni_hu_w:
++  case LoongArch::BI__builtin_lsx_vssrlrni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrlrni_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w:
++    i = 2;
++    l = 0;
++    u = 31;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_b:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 31);
++  // These intrinsics take an unsigned 6 bit immediate.
++  case LoongArch::BI__builtin_lsx_vbitclri_d:
++  case LoongArch::BI__builtin_lasx_xvbitclri_d:
++  case LoongArch::BI__builtin_lsx_vbitrevi_d:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_d:
++  case LoongArch::BI__builtin_lsx_vbitseti_d:
++  case LoongArch::BI__builtin_lasx_xvbitseti_d:
++  case LoongArch::BI__builtin_lsx_vsat_d:
++  case LoongArch::BI__builtin_lsx_vsat_du:
++  case LoongArch::BI__builtin_lasx_xvsat_d:
++  case LoongArch::BI__builtin_lasx_xvsat_du:
++  case LoongArch::BI__builtin_lsx_vslli_d:
++  case LoongArch::BI__builtin_lasx_xvslli_d:
++  case LoongArch::BI__builtin_lsx_vsrai_d:
++  case LoongArch::BI__builtin_lasx_xvsrai_d:
++  case LoongArch::BI__builtin_lsx_vsrli_d:
++  case LoongArch::BI__builtin_lasx_xvsrli_d:
++  case LoongArch::BI__builtin_lsx_vsrari_d:
++  case LoongArch::BI__builtin_lasx_xvsrari_d:
++  case LoongArch::BI__builtin_lsx_vrotri_d:
++  case LoongArch::BI__builtin_lasx_xvrotri_d:
++  case LoongArch::BI__builtin_lasx_xvsrlri_d:
++  case LoongArch::BI__builtin_lsx_vsrlri_d:
++    i = 1;
++    l = 0;
++    u = 63;
++    break;
++  case LoongArch::BI__builtin_lsx_vssrarni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrarni_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrarni_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrarni_wu_d:
++  case LoongArch::BI__builtin_lsx_vssrani_w_d:
++  case LoongArch::BI__builtin_lsx_vssrani_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrani_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrani_wu_d:
++  case LoongArch::BI__builtin_lsx_vsrarni_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrarni_w_d:
++  case LoongArch::BI__builtin_lsx_vsrlni_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrlni_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrlni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrlni_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrlni_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrlni_wu_d:
++  case LoongArch::BI__builtin_lsx_vssrlrni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrlrni_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d:
++  case LoongArch::BI__builtin_lsx_vsrani_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrani_w_d:
++    i = 2;
++    l = 0;
++    u = 63;
++    break;
++  // These intrinsics take an unsigned 7 bit immediate.
++  case LoongArch::BI__builtin_lsx_vssrarni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrarni_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrarni_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrarni_du_q:
++  case LoongArch::BI__builtin_lsx_vssrani_d_q:
++  case LoongArch::BI__builtin_lsx_vssrani_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrani_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrani_du_q:
++  case LoongArch::BI__builtin_lsx_vsrarni_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrarni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrlni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrlni_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrlni_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrlni_du_q:
++  case LoongArch::BI__builtin_lsx_vssrlrni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrlrni_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_du_q:
++  case LoongArch::BI__builtin_lsx_vsrani_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrani_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrlni_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_d_q:
++  case LoongArch::BI__builtin_lsx_vsrlni_d_q:
++    i = 2;
++    l = 0;
++    u = 127;
++    break;
++  // These intrinsics take a signed 5 bit immediate.
++  case LoongArch::BI__builtin_lsx_vseqi_b:
++  case LoongArch::BI__builtin_lsx_vseqi_h:
++  case LoongArch::BI__builtin_lsx_vseqi_w:
++  case LoongArch::BI__builtin_lsx_vseqi_d:
++  case LoongArch::BI__builtin_lasx_xvseqi_b:
++  case LoongArch::BI__builtin_lasx_xvseqi_h:
++  case LoongArch::BI__builtin_lasx_xvseqi_w:
++  case LoongArch::BI__builtin_lasx_xvseqi_d:
++  case LoongArch::BI__builtin_lsx_vslti_b:
++  case LoongArch::BI__builtin_lsx_vslti_h:
++  case LoongArch::BI__builtin_lsx_vslti_w:
++  case LoongArch::BI__builtin_lsx_vslti_d:
++  case LoongArch::BI__builtin_lasx_xvslti_b:
++  case LoongArch::BI__builtin_lasx_xvslti_h:
++  case LoongArch::BI__builtin_lasx_xvslti_w:
++  case LoongArch::BI__builtin_lasx_xvslti_d:
++  case LoongArch::BI__builtin_lsx_vslei_b:
++  case LoongArch::BI__builtin_lsx_vslei_h:
++  case LoongArch::BI__builtin_lsx_vslei_w:
++  case LoongArch::BI__builtin_lsx_vslei_d:
++  case LoongArch::BI__builtin_lasx_xvslei_b:
++  case LoongArch::BI__builtin_lasx_xvslei_h:
++  case LoongArch::BI__builtin_lasx_xvslei_w:
++  case LoongArch::BI__builtin_lasx_xvslei_d:
++  case LoongArch::BI__builtin_lsx_vmaxi_b:
++  case LoongArch::BI__builtin_lsx_vmaxi_h:
++  case LoongArch::BI__builtin_lsx_vmaxi_w:
++  case LoongArch::BI__builtin_lsx_vmaxi_d:
++  case LoongArch::BI__builtin_lasx_xvmaxi_b:
++  case LoongArch::BI__builtin_lasx_xvmaxi_h:
++  case LoongArch::BI__builtin_lasx_xvmaxi_w:
++  case LoongArch::BI__builtin_lasx_xvmaxi_d:
++  case LoongArch::BI__builtin_lsx_vmini_b:
++  case LoongArch::BI__builtin_lsx_vmini_h:
++  case LoongArch::BI__builtin_lsx_vmini_w:
++  case LoongArch::BI__builtin_lasx_xvmini_b:
++  case LoongArch::BI__builtin_lasx_xvmini_h:
++  case LoongArch::BI__builtin_lasx_xvmini_w:
++  case LoongArch::BI__builtin_lasx_xvmini_d:
++  case LoongArch::BI__builtin_lsx_vmini_d:
++    i = 1;
++    l = -16;
++    u = 15;
++    break;
++  // These intrinsics take a signed 9 bit immediate.
++  case LoongArch::BI__builtin_lasx_xvldrepl_d:
++  case LoongArch::BI__builtin_lsx_vldrepl_d:
++    i = 1;
++    l = -256;
++    u = 255;
++    break;
++  // These intrinsics take an unsigned 8 bit immediate.
++  case LoongArch::BI__builtin_lsx_vandi_b:
++  case LoongArch::BI__builtin_lasx_xvandi_b:
++  case LoongArch::BI__builtin_lsx_vnori_b:
++  case LoongArch::BI__builtin_lasx_xvnori_b:
++  case LoongArch::BI__builtin_lsx_vori_b:
++  case LoongArch::BI__builtin_lasx_xvori_b:
++  case LoongArch::BI__builtin_lsx_vshuf4i_b:
++  case LoongArch::BI__builtin_lsx_vshuf4i_h:
++  case LoongArch::BI__builtin_lsx_vshuf4i_w:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_b:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_h:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_w:
++  case LoongArch::BI__builtin_lasx_xvxori_b:
++  case LoongArch::BI__builtin_lasx_xvpermi_d:
++  case LoongArch::BI__builtin_lsx_vxori_b:
++    i = 1;
++    l = 0;
++    u = 255;
++    break;
++  case LoongArch::BI__builtin_lsx_vbitseli_b:
++  case LoongArch::BI__builtin_lasx_xvbitseli_b:
++  case LoongArch::BI__builtin_lsx_vshuf4i_d:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_d:
++  case LoongArch::BI__builtin_lsx_vextrins_b:
++  case LoongArch::BI__builtin_lsx_vextrins_h:
++  case LoongArch::BI__builtin_lsx_vextrins_w:
++  case LoongArch::BI__builtin_lsx_vextrins_d:
++  case LoongArch::BI__builtin_lasx_xvextrins_b:
++  case LoongArch::BI__builtin_lasx_xvextrins_h:
++  case LoongArch::BI__builtin_lasx_xvextrins_w:
++  case LoongArch::BI__builtin_lasx_xvextrins_d:
++  case LoongArch::BI__builtin_lasx_xvpermi_q:
++  case LoongArch::BI__builtin_lsx_vpermi_w:
++  case LoongArch::BI__builtin_lasx_xvpermi_w:
++    i = 2;
++    l = 0;
++    u = 255;
++    break;
++  // df/n format
++  // These intrinsics take an unsigned 4 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_b:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_bu:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_b:
++  case LoongArch::BI__builtin_lsx_vreplvei_b:
++    i = 1;
++    l = 0;
++    u = 15;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_b:
++    i = 2;
++    l = 0;
++    u = 15;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_h:
++  case LoongArch::BI__builtin_lsx_vstelm_b:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 15);
++  // These intrinsics take an unsigned 3 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_h:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_hu:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_h:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_w:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_wu:
++  case LoongArch::BI__builtin_lasx_xvpickve_w:
++  case LoongArch::BI__builtin_lsx_vreplvei_h:
++    i = 1;
++    l = 0;
++    u = 7;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_h:
++  case LoongArch::BI__builtin_lasx_xvinsgr2vr_w:
++  case LoongArch::BI__builtin_lasx_xvinsve0_w:
++    i = 2;
++    l = 0;
++    u = 7;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_w:
++  case LoongArch::BI__builtin_lsx_vstelm_h:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
++  // These intrinsics take an unsigned 2 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_w:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_wu:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_w:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_d:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_du:
++  case LoongArch::BI__builtin_lasx_xvpickve_d:
++  case LoongArch::BI__builtin_lsx_vreplvei_w:
++    i = 1;
++    l = 0;
++    u = 3;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_w:
++  case LoongArch::BI__builtin_lasx_xvinsve0_d:
++  case LoongArch::BI__builtin_lasx_xvinsgr2vr_d:
++    i = 2;
++    l = 0;
++    u = 3;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_d:
++  case LoongArch::BI__builtin_lsx_vstelm_w:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 3);
++  // These intrinsics take an unsigned 1 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_d:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_du:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_d:
++  case LoongArch::BI__builtin_lsx_vreplvei_d:
++    i = 1;
++    l = 0;
++    u = 1;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_d:
++    i = 2;
++    l = 0;
++    u = 1;
++    break;
++  case LoongArch::BI__builtin_lsx_vstelm_d:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 1);
++  // Memory offsets and immediate loads.
++  // These intrinsics take a signed 10 bit immediate.
++  case LoongArch::BI__builtin_lasx_xvldrepl_w:
++  case LoongArch::BI__builtin_lsx_vldrepl_w:
++    i = 1;
++    l = -512;
++    u = 511;
++    break;
++  case LoongArch::BI__builtin_lasx_xvldrepl_h:
++  case LoongArch::BI__builtin_lsx_vldrepl_h:
++    i = 1;
++    l = -1024;
++    u = 1023;
++    break;
++  case LoongArch::BI__builtin_lasx_xvldrepl_b:
++  case LoongArch::BI__builtin_lsx_vldrepl_b:
++    i = 1;
++    l = -2048;
++    u = 2047;
++    break;
++  case LoongArch::BI__builtin_lasx_xvld:
++  case LoongArch::BI__builtin_lsx_vld:
++    i = 1;
++    l = -2048;
++    u = 2047;
++    break;
++  case LoongArch::BI__builtin_lsx_vst:
++  case LoongArch::BI__builtin_lasx_xvst:
++    i = 2;
++    l = -2048;
++    u = 2047;
++    break;
++  case LoongArch::BI__builtin_lasx_xvldi:
++  case LoongArch::BI__builtin_lsx_vldi:
++    i = 0;
++    l = -4096;
++    u = 4095;
++    break;
++  // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate.
++  case LoongArch::BI__builtin_loongarch_cacop_w:
++  case LoongArch::BI__builtin_loongarch_cacop_d:
++    return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) ||
++           SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047);
++  // These intrinsics take an unsigned 14 bit immediate.
++  case LoongArch::BI__builtin_loongarch_csrrd_w:
++  case LoongArch::BI__builtin_loongarch_csrrd_d:
++    i = 0;
++    l = 0;
++    u = 16383;
++    break;
++  case LoongArch::BI__builtin_loongarch_csrwr_w:
++  case LoongArch::BI__builtin_loongarch_csrwr_d:
++    i = 1;
++    l = 0;
++    u = 16383;
++    break;
++  case LoongArch::BI__builtin_loongarch_csrxchg_w:
++  case LoongArch::BI__builtin_loongarch_csrxchg_d:
++    i = 2;
++    l = 0;
++    u = 16383;
++    break;
++  // These intrinsics take an unsigned 15 bit immediate.
++  case LoongArch::BI__builtin_loongarch_dbar:
++  case LoongArch::BI__builtin_loongarch_ibar:
++  case LoongArch::BI__builtin_loongarch_syscall:
++  case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break;
++  }
++
++  if (!m)
++    return SemaBuiltinConstantArgRange(TheCall, i, l, u);
++
++  return SemaBuiltinConstantArgRange(TheCall, i, l, u) ||
++         SemaBuiltinConstantArgMultiple(TheCall, i, m);
++}
++
+ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
+                                            CallExpr *TheCall) {
+   if (BuiltinID == SystemZ::BI__builtin_tabort) {
+diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
+new file mode 100644
+index 000000000..aa8f63094
+--- /dev/null
++++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
+@@ -0,0 +1,471 @@
++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \
++ // RUN: -emit-llvm %s -o - | FileCheck %s
++
++/// This test checks the calling convention of the lp64d ABI.
++
++#include <stddef.h>
++#include <stdint.h>
++
++/// Part 0: C Data Types and Alignment.
++
++/// `char` datatype is signed by default.
++/// In most cases, the unsigned integer data types are zero-extended when stored
++/// in general-purpose register, and the signed integer data types are
++/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as
++/// unsigned int, are stored in general-purpose registers as proper sign
++/// extensions of their 32-bit values.
++
++// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool()
++_Bool check_bool() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i8 @check_char()
++char check_char() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i16 @check_short()
++short check_short() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i32 @check_int()
++int check_int() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_long()
++long check_long() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_longlong()
++long long check_longlong() { return 0; }
++
++// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar()
++unsigned char check_uchar() { return 0; }
++
++// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort()
++unsigned short check_ushort() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i32 @check_uint()
++unsigned int check_uint() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_ulong()
++unsigned long check_ulong() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_ulonglong()
++unsigned long long check_ulonglong() { return 0; }
++
++// CHECK-LABEL: define{{.*}} float @check_float()
++float check_float() { return 0; }
++
++// CHECK-LABEL: define{{.*}} double @check_double()
++double check_double() { return 0; }
++
++// CHECK-LABEL: define{{.*}} fp128 @check_longdouble()
++long double check_longdouble() { return 0; }
++
++/// Part 1: Scalar arguments and return value.
++
++/// The lp64d abi says:
++/// 1. 1 < WOA <= GRLEN
++/// a. Argument is passed in a single argument register, or on the stack by
++/// value if none is available.
++/// i. If the argument is floating-point type, the argument is passed in FAR. if
++/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s
++/// passed on the stack. When passed in registers or on the stack,
++/// floating-point types narrower than GRLEN bits are widened to GRLEN bits,
++/// with the upper bits undefined.
++/// ii. If the argument is integer or pointer type, the argument is passed in
++/// GAR. If no GAR is available, it’s passed on the stack. When passed in
++/// registers or on the stack, the unsigned integer scalars narrower than GRLEN
++/// bits are zero-extended to GRLEN bits, and the signed integer scalars are
++/// sign-extended.
++/// 2. GRLEN < WOA ≤ 2 × GRLEN
++/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in
++/// the lower-numbered register and the high-order GRLEN bits in the
++/// higher-numbered register. If exactly one register is available, the
++/// low-order GRLEN bits are passed in the register and the high-order GRLEN
++/// bits are passed on the stack. If no GAR is available, it’s passed on the
++/// stack.
++
++/// Note that most of these conventions are handled at the llvm side, so here we
++/// only check the correctness of argument (or return value)'s sign/zero
++/// extension attribute.
++
++// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p)
++int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f,
++             uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i,
++             int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n,
++             uint32_t o, int64_t p) {
++  return 0;
++}
++
++/// Part 2: Structure arguments and return value.
++
++/// The lp64d abi says:
++/// Empty structures are ignored by C compilers which support them as a
++/// non-standard extension(same as union arguments and return values). Bits
++/// unused due to padding, and bits past the end of a structure whose size in
++/// bits is not divisible by GRLEN, are undefined. And the layout of the
++/// structure on the stack is consistent with that in memory.
++
++/// Check empty structs are ignored.
++
++struct empty_s {};
++
++// CHECK-LABEL: define{{.*}} void @f_empty_s()
++struct empty_s f_empty_s(struct empty_s x) {
++  return x;
++}
++
++/// 1. 0 < WOA ≤ GRLEN
++/// a. The structure has only fixed-point members. If there is an available GAR,
++/// the structure is passed through the GAR by value passing; If no GAR is
++/// available, it’s passed on the stack.
++
++struct i16x4_s {
++  int16_t a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce)
++struct i16x4_s f_i16x4_s(struct i16x4_s x) {
++  return x;
++}
++
++/// b. The structure has only floating-point members:
++/// i. One floating-point member. The argument is passed in a FAR; If no FAR is
++/// available, the value is passed in a GAR; if no GAR is available, the value
++/// is passed on the stack.
++
++struct f32x1_s {
++  float a;
++};
++
++struct f64x1_s {
++  double a;
++};
++
++// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0)
++struct f32x1_s f_f32x1_s(struct f32x1_s x) {
++  return x;
++}
++
++// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0)
++struct f64x1_s f_f64x1_s(struct f64x1_s x) {
++  return x;
++}
++
++/// ii. Two floating-point members. The argument is passed in a pair of
++/// available FAR, with the low-order float member bits in the lower-numbered
++/// FAR and the high-order float member bits in the higher-numbered FAR. If the
++/// number of available FAR is less than 2, it’s passed in a GAR, and passed on
++/// the stack if no GAR is available.
++
++struct f32x2_s {
++  float a, b;
++};
++
++// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1)
++struct f32x2_s f_f32x2_s(struct f32x2_s x) {
++  return x;
++}
++
++/// c. The structure has both fixed-point and floating-point members, i.e. the
++/// structure has one float member and...
++/// i. Multiple fixed-point members. If there are available GAR, the structure
++/// is passed in a GAR, and passed on the stack if no GAR is available.
++
++struct f32x1_i16x2_s {
++  float a;
++  int16_t b, c;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce)
++struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
++  return x;
++}
++
++/// ii. Only one fixed-point member. If one FAR and one GAR are available, the
++/// floating-point member of the structure is passed in the FAR, and the integer
++/// member of the structure is passed in the GAR; If no floating-point register
++/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s
++/// passed on the stack.
++
++struct f32x1_i32x1_s {
++  float a;
++  int32_t b;
++};
++
++// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1)
++struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) {
++  return x;
++}
++
++/// 2. GRLEN < WOA ≤ 2 × GRLEN
++/// a. Only fixed-point members.
++/// i. The argument is passed in a pair of available GAR, with the low-order
++/// bits in the lower-numbered GAR and the high-order bits in the
++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in
++/// the GAR and the high-order bits are on the stack, and passed on the stack if
++/// no GAR is available.
++
++struct i64x2_s {
++  int64_t a, b;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce)
++struct i64x2_s f_i64x2_s(struct i64x2_s x) {
++  return x;
++}
++
++/// b. Only floating-point members.
++/// i. The structure has one long double member or one double member and two
++/// adjacent float members or 3-4 float members. The argument is passed in a
++/// pair of available GAR, with the low-order bits in the lower-numbered GAR and
++/// the high-order bits in the higher-numbered GAR. If only one GAR is
++/// available, the low-order bits are in the GAR and the high-order bits are on
++/// the stack, and passed on the stack if no GAR is available.
++
++struct f128x1_s {
++  long double a;
++};
++
++// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce)
++struct f128x1_s f_f128x1_s(struct f128x1_s x) {
++  return x;
++}
++
++struct f64x1_f32x2_s {
++  double a;
++  float b, c;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce)
++struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) {
++  return x;
++}
++
++struct f32x3_s {
++  float a, b, c;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce)
++struct f32x3_s f_f32x3_s(struct f32x3_s x) {
++  return x;
++}
++
++struct f32x4_s {
++  float a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce)
++struct f32x4_s f_f32x4_s(struct f32x4_s x) {
++  return x;
++}
++
++/// ii. The structure with two double members is passed in a pair of available
++/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with
++/// one double member and one float member is same.
++
++struct f64x2_s {
++  double a, b;
++};
++
++// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1)
++struct f64x2_s f_f64x2_s(struct f64x2_s x) {
++  return x;
++}
++
++/// c. Both fixed-point and floating-point members.
++/// i. The structure has one double member and only one fixed-point member.
++/// A. If one FAR and one GAR are available, the floating-point member of the
++/// structure is passed in the FAR, and the integer member of the structure is
++/// passed in the GAR; If no floating-point registers but two GARs are
++/// available, it’s passed in the two GARs; If only one GAR is available, the
++/// low-order bits are in the GAR and the high-order bits are on the stack; And
++/// it’s passed on the stack if no GAR is available.
++
++struct f64x1_i64x1_s {
++  double a;
++  int64_t b;
++};
++
++// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1)
++struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) {
++  return x;
++}
++
++/// ii. Others
++/// A. The argument is passed in a pair of available GAR, with the low-order
++/// bits in the lower-numbered GAR and the high-order bits in the
++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in
++/// the GAR and the high-order bits are on the stack, and passed on the stack if
++/// no GAR is available.
++
++struct f64x1_i32x2_s {
++  double a;
++  int32_t b, c;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce)
++struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) {
++  return x;
++}
++
++struct f32x2_i32x2_s {
++  float a, b;
++  int32_t c, d;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce)
++struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) {
++  return x;
++}
++
++/// 3. WOA > 2 × GRLEN
++/// a. It’s passed by reference and are replaced in the argument list with the
++/// address. If there is an available GAR, the reference is passed in the GAR,
++/// and passed on the stack if no GAR is available.
++
++struct i64x4_s {
++  int64_t a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, ptr{{.*}} %x)
++struct i64x4_s f_i64x4_s(struct i64x4_s x) {
++  return x;
++}
++
++struct f64x4_s {
++  double a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, ptr{{.*}} %x)
++struct f64x4_s f_f64x4_s(struct f64x4_s x) {
++  return x;
++}
++
++/// Part 3: Union arguments and return value.
++
++/// Check empty unions are ignored.
++
++union empty_u {};
++
++// CHECK-LABEL: define{{.*}} void @f_empty_u()
++union empty_u f_empty_u(union empty_u x) {
++  return x;
++}
++
++/// Union is passed in GAR or stack.
++/// 1. 0 < WOA ≤ GRLEN
++/// a. The argument is passed in a GAR, or on the stack by value if no GAR is
++/// available.
++
++union i32_f32_u {
++  int32_t a;
++  float b;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce)
++union i32_f32_u f_i32_f32_u(union i32_f32_u x) {
++  return x;
++}
++
++union i64_f64_u {
++  int64_t a;
++  double b;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce)
++union i64_f64_u f_i64_f64_u(union i64_f64_u x) {
++  return x;
++}
++
++/// 2. GRLEN < WOA ≤ 2 × GRLEN
++/// a. The argument is passed in a pair of available GAR, with the low-order
++/// bits in the lower-numbered GAR and the high-order bits in the
++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in
++/// the GAR and the high-order bits are on the stack. The arguments are passed
++/// on the stack when no GAR is available.
++
++union i128_f128_u {
++  __int128_t a;
++  long double b;
++};
++
++// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce)
++union i128_f128_u f_i128_f128_u(union i128_f128_u x) {
++  return x;
++}
++
++/// 3. WOA > 2 × GRLEN
++/// a. It’s passed by reference and are replaced in the argument list with the
++/// address. If there is an available GAR, the reference is passed in the GAR,
++/// and passed on the stack if no GAR is available.
++
++union i64_arr3_u {
++  int64_t a[3];
++};
++
++// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, ptr{{.*}} %x)
++union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) {
++  return x;
++}
++
++/// Part 4: Complex number arguments and return value.
++
++/// A complex floating-point number, or a structure containing just one complex
++/// floating-point number, is passed as though it were a structure containing
++/// two floating-point reals.
++
++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1)
++float __complex__ f_floatcomplex(float __complex__ x) { return x; }
++
++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1)
++double __complex__ f_doublecomplex(double __complex__ x) { return x; }
++
++struct floatcomplex_s {
++  float __complex__ c;
++};
++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1)
++struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) {
++  return x;
++}
++
++struct doublecomplex_s {
++  double __complex__ c;
++};
++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1)
++struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) {
++  return x;
++}
++
++/// Part 5: Variadic arguments.
++
++/// Variadic arguments are passed in GARs in the same manner as named arguments.
++
++int f_va_callee(int, ...);
++
++// CHECK-LABEL: define{{.*}} void @f_va_caller()
++// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}})
++void f_va_caller(void) {
++  f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9},
++              (struct i64x2_s){10, 11});
++}
++
++// CHECK-LABE: define signext i32 @f_va_int(ptr{{.*}} %fmt, ...)
++// CHECK: entry:
++// CHECK:   %fmt.addr = alloca ptr, align 8
++// CHECK:   %va = alloca ptr, align 8
++// CHECK:   %v = alloca i32, align 4
++// CHECK:   store ptr %fmt, ptr %fmt.addr, align 8
++// CHECK:   call void @llvm.va_start(ptr %va)
++// CHECK:   %argp.cur = load ptr, ptr %va, align 8
++// CHECK:   %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8
++// CHECK:   store ptr %argp.next, ptr %va, align 8
++// CHECK:   %0 = load i32, ptr %argp.cur, align 8
++// CHECK:   store i32 %0, ptr %v, align 4
++// CHECK:   call void @llvm.va_end(ptr %va)
++// CHECK:   %1 = load i32, ptr %v, align 4
++// CHECK:   ret i32 %1
++// CHECK: }
++int f_va_int(char *fmt, ...) {
++  __builtin_va_list va;
++  __builtin_va_start(va, fmt);
++  int v = __builtin_va_arg(va, int);
++  __builtin_va_end(va);
++  return v;
++}
+diff --git a/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c
+new file mode 100644
+index 000000000..e4a03d782
+--- /dev/null
++++ b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c
+@@ -0,0 +1,49 @@
++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \
++// RUN:   | FileCheck %s
++
++float f;
++double d;
++
++// CHECK-LABEL: @reg_float(
++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f
++// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]])
++// CHECK: ret void
++void reg_float() {
++  float a = f;
++  asm volatile(""
++               :
++               : "r"(a));
++}
++
++// CHECK-LABEL: @r4_float(
++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f
++// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]])
++// CHECK: ret void
++void r4_float() {
++  register float a asm("$r4") = f;
++  asm volatile(""
++               :
++               : "r"(a));
++}
++
++// CHECK-LABEL: @reg_double(
++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d
++// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]])
++// CHECK: ret void
++void reg_double() {
++  double a = d;
++  asm volatile(""
++               :
++               : "r"(a));
++}
++
++// CHECK-LABEL: @r4_double(
++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d
++// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]])
++// CHECK: ret void
++void r4_double() {
++  register double a asm("$r4") = d;
++  asm volatile(""
++               :
++               : "r"(a));
++}
+diff --git a/clang/test/CodeGen/builtins-loongarch-base.c b/clang/test/CodeGen/builtins-loongarch-base.c
+new file mode 100644
+index 000000000..cdff582fa
+--- /dev/null
++++ b/clang/test/CodeGen/builtins-loongarch-base.c
+@@ -0,0 +1,409 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s
++
++#include <larchintrin.h>
++
++typedef char i8;
++typedef unsigned char u8;
++typedef short i16;
++typedef unsigned short u16;
++typedef int i32;
++typedef unsigned int u32;
++
++#if __LONG_MAX__ == __LONG_LONG_MAX__
++typedef long int i64;
++typedef unsigned long int u64;
++#else
++typedef long long i64;
++typedef unsigned long long u64;
++#endif
++
++__drdtime_t drdtime;
++__rdtime_t rdtime;
++
++void cpucfg(){
++
++  u32 u32_r, u32_a;
++  // __cpucfg
++  // rd, rj
++  // unsigned int, unsigned int
++  u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg
++
++}
++
++void csrrd_w() {
++
++  u32 u32_r;
++  // __csrrd_w
++  // rd, csr_num
++  // unsigned int, uimm14_32
++  u32_r = __builtin_loongarch_csrrd_w(1); // CHECK: call i32 @llvm.loongarch.csrrd.w
++}
++
++void csrrd_d() {
++
++  u64 u64_r;
++  // __csrrd_d
++  // rd, csr_num
++  // unsigned long int, uimm14
++  u64_r = __builtin_loongarch_csrrd_d(1); // CHECK: call i64 @llvm.loongarch.csrrd.d
++}
++
++void csrwr_w() {
++
++  u32 u32_r, u32_a;
++  // __csrwr_w
++  // rd, csr_num
++  // unsigned int, uimm14_32
++  u32_r = __builtin_loongarch_csrwr_w(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr.w
++}
++
++void csrwr_d() {
++
++  u64 u64_r, u64_a;
++  // __csrwr_d
++  // rd, csr_num
++  // unsigned long int, uimm14
++  u64_r = __builtin_loongarch_csrwr_d(u64_a, 1); // CHECK: call i64 @llvm.loongarch.csrwr.d
++}
++
++void csrxchg_w() {
++
++  u32 u32_r, u32_a, u32_b;
++  // __csrxchg_w
++  // rd, rj, csr_num
++  // unsigned int, unsigned int, uimm14_32
++  u32_r = __builtin_loongarch_csrxchg_w(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg.w
++}
++
++void csrxchg_d() {
++
++  u64 u64_r, u64_a, u64_b;
++  // __csrxchg_d
++  // rd, rj, csr_num
++  // unsigned long int, unsigned long int, uimm14
++  u64_r = __builtin_loongarch_csrxchg_d(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.csrxchg.d
++}
++
++void iocsrrd_b(){
++
++  u32 u32_a;
++  u8 u8_r;
++  // __iocsrrd_b
++  // rd, rj
++  // unsigned char, unsigned int
++  u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b
++
++}
++
++void iocsrrd_h(){
++
++  u32 u32_a;
++  u16 u16_r;
++  // __iocsrrd_h
++  // rd, rj
++  // unsigned short, unsigned int
++  u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h
++
++}
++
++void iocsrrd_w(){
++
++  u32 u32_r, u32_a;
++  // __iocsrrd_w
++  // rd, rj
++  // unsigned int, unsigned int
++  u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w
++
++}
++
++void iocsrrd_d(){
++
++  u32 u32_a;
++  u64 u64_r;
++  // __iocsrrd_d
++  // rd, rj
++  // unsigned long int, unsigned int
++  u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d
++
++}
++
++void iocsrwr_b(){
++
++  u32 u32_a;
++  u8 u8_a;
++  // __iocsrwr_b
++  // rd, rj
++  // unsigned char, unsigned int
++  __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b
++
++}
++
++void iocsrwr_h(){
++
++  u32 u32_a;
++  u16 u16_a;
++  // __iocsrwr_h
++  // rd, rj
++  // unsigned short, unsigned int
++  __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h
++
++}
++
++void iocsrwr_w(){
++
++  u32 u32_a, u32_b;
++  // __iocsrwr_w
++  // rd, rj
++  // unsigned int, unsigned int
++  __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w
++
++}
++
++void iocsrwr_d(){
++
++  u32 u32_a;
++  u64 u64_a;
++  // __iocsrwr_d
++  // rd, rj
++  // unsigned long int, unsigned int
++  __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d
++
++}
++
++void cacop_w() {
++
++  i32 i32_a;
++  // __cacop_w
++  // op, rj, si12
++  // uimm5, unsigned int, simm12
++  __builtin_loongarch_cacop_w(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop.w
++}
++
++void cacop_d() {
++
++  i64 i64_a;
++  // __cacop_d
++  // op, rj, si12
++  // uimm5, unsigned long int, simm12
++  __builtin_loongarch_cacop_d(1, i64_a, 2); // CHECK: void @llvm.loongarch.cacop.d
++}
++
++void rdtime_d(){
++
++  drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"()
++
++}
++
++void rdtimeh_w(){
++
++  rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"()
++
++}
++
++void rdtimel_w(){
++
++  rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"()
++
++}
++
++void crc_w_b_w(){
++
++  i32 i32_r, i32_a;
++  i8 i8_a;
++  // __crc_w_b_w
++  // rd, rj, rk
++  // int, char, int
++  i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w
++
++}
++
++void crc_w_h_w(){
++
++  i32 i32_r, i32_a;
++  i16 i16_a;
++  // __crc_w_h_w
++  // rd, rj, rk
++  // int, short, int
++  i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w
++
++}
++
++void crc_w_w_w(){
++
++  i32 i32_r, i32_a, i32_b;
++  // __crc_w_w_w
++  // rd, rj, rk
++  // int, int, int
++  i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w
++
++}
++
++void crc_w_d_w(){
++
++  i32 i32_r, i32_a;
++  i64 i64_a;
++  // __crc_w_d_w
++  // rd, rj, rk
++  // int, long int, int
++  i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w
++
++}
++
++void crcc_w_b_w(){
++
++  i32 i32_r, i32_a;
++  i8 i8_a;
++  // __crcc_w_b_w
++  // rd, rj, rk
++  // int, char, int
++  i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w
++
++}
++
++void crcc_w_h_w(){
++
++  i32 i32_r, i32_a;
++  i16 i16_a;
++  // __crcc_w_h_w
++  // rd, rj, rk
++  // int, short, int
++  i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w
++
++}
++
++void crcc_w_w_w(){
++
++  i32 i32_r, i32_a, i32_b;
++  // __crcc_w_w_w
++  // rd, rj, rk
++  // int, int, int
++  i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w
++
++}
++
++void crcc_w_d_w(){
++
++  i32 i32_r, i32_a;
++  i64 i64_a;
++  // __crcc_w_d_w
++  // rd, rj, rk
++  // int, long int, int
++  i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w
++
++}
++
++void tlbclr(){
++
++  // __tlbclr
++  __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr
++
++}
++
++void tlbflush(){
++
++  // __tlbflush
++  __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush
++
++}
++
++void tlbfill(){
++
++  // __tlbfill
++  __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill 
++
++}
++
++void tlbrd(){
++
++  // __tlbrd
++  __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd
++
++}
++
++void tlbwr(){
++
++  // __tlbwr
++  __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr
++
++}
++
++void tlbsrch(){
++
++  // __tlbsrch
++  __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch
++
++}
++
++void syscall(){
++
++  // __syscall
++  // Code
++  // uimm15
++  __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall
++
++}
++
++void break_builtin(){
++
++  // __break
++  // Code
++  // uimm15
++  __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break
++
++}
++
++void asrtle_d(){
++
++  i64 i64_a, i64_b;
++  // __asrtle_d
++  // rj, rk
++  // long int, long int
++  __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d
++
++}
++
++void asrtgt_d(){
++
++  i64 i64_a, i64_b;
++  // __asrtgt_d
++  // rj, rk
++  // long int, long int
++  __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d
++
++}
++
++void dbar(){
++
++  // __dbar
++  // hint
++  // uimm15
++  __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar
++
++}
++
++void ibar(){
++
++  // __ibar
++  // hint
++  // uimm15
++  __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar
++
++}
++
++void movfcsr2gr(){
++
++  u32 u32_r;
++  // __movfcsr2gr
++  u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"()
++
++}
++
++
++void movgr2fcsr() {
++
++  u32 u32_a;
++  // __movgr2fcsr
++  __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0)
++
++}
+diff --git a/clang/test/CodeGen/builtins-loongarch-lasx-error.c b/clang/test/CodeGen/builtins-loongarch-lasx-error.c
+new file mode 100644
+index 000000000..99f2687e4
+--- /dev/null
++++ b/clang/test/CodeGen/builtins-loongarch-lasx-error.c
+@@ -0,0 +1,266 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \
++// RUN:            -target-feature +lasx \
++// RUN:            -verify -o - 2>&1
++
++#include <lasxintrin.h>
++
++void test() {
++  v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32i8 v32i8_r;
++
++  v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i16 v16i16_r;
++
++  v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i32 v8i32_r;
++
++  v4i64 v4i64_a = (v4i64){0, 1, 2, 3};
++  v4i64 v4i64_b = (v4i64){1, 2, 3, 4};
++  v4i64 v4i64_c = (v4i64){2, 3, 4, 5};
++  v4i64 v4i64_r;
++
++  v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32u8 v32u8_r;
++
++  v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u16 v16u16_r;
++
++  v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u32 v8u32_r;
++
++  v4u64 v4u64_a = (v4u64){0, 1, 2, 3};
++  v4u64 v4u64_b = (v4u64){1, 2, 3, 4};
++  v4u64 v4u64_c = (v4u64){2, 3, 4, 5};
++  v4u64 v4u64_r;
++
++  v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7};
++  v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8};
++  v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9};
++  v8f32 v8f32_r;
++  v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3};
++  v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4};
++  v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5};
++  v4f64 v4f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  v32i8_r = __lasx_xvslli_b(v32i8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvslli_h(v16i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvslli_w(v8i32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvslli_d(v4i64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrai_b(v32i8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrai_h(v16i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrai_w(v8i32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrai_d(v4i64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrari_b(v32i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrari_h(v16i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrari_w(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrari_d(v4i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrli_b(v32i8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrli_h(v16i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrli_w(v8i32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrli_d(v4i64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvaddi_du(v4i64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsubi_du(v4i64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvmini_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvmini_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvmini_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvmini_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32u8_r = __lasx_xvmini_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16u16_r = __lasx_xvmini_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvmini_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvmini_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvseqi_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvseqi_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvseqi_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvseqi_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32i8_r = __lasx_xvslti_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvslti_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvslti_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvslti_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32i8_r = __lasx_xvslti_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvslti_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvslti_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvslti_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvslei_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvslei_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvslei_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvslei_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32i8_r = __lasx_xvslei_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvslei_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvslei_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvslei_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvsat_b(v32i8_a, 8);                     // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsat_h(v16i16_a, 16);                  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsat_w(v8i32_a, 32);                    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsat_d(v4i64_a, 64);                    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvsat_bu(v32u8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvsat_hu(v16u16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvsat_wu(v8u32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvsat_du(v4u64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8);            // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4);              // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2);              // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v32u8_r = __lasx_xvandi_b(v32u8_a, 256);                  // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvori_b(v32u8_a, 256);                   // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvnori_b(v32u8_a, 256);                  // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvxori_b(v32u8_a, 256);                  // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256);        // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32);               // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8);             // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16);            // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32);             // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32);        // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32);     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256);       // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvbsll_v(v32i8_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256);   // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvld(&v32i8_a, -2049);                   // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  __lasx_xvst(v32i8_a, &v32i8_b, -2049);                    // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32);               // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8);                // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4);                // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8);         // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4);         // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v8i32_r = __lasx_xvpickve_w(v8i32_b, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i64_r = __lasx_xvpickve_d(v4i64_b, 4);                  // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v4i64_r = __lasx_xvldi(-4097);                            // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}}
++  v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8);         // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4);         // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256);        // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i64_r = __lasx_xvpermi_d(v4i64_a, 256);                 // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049);             // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025);           // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}}
++  v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513);              // expected-error {{argument value -513 is outside the valid range [-512, 511]}}
++  v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257);              // expected-error {{argument value -257 is outside the valid range [-256, 255]}}
++  i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8);                // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4);                 // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4);                // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v32i8_r = __lasx_xvrotri_b(v32i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvrotri_h(v16i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvrotri_w(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvrotri_d(v4i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16);       // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64);       // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128);      // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16);       // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64);       // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128);      // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++}
+diff --git a/clang/test/CodeGen/builtins-loongarch-lasx.c b/clang/test/CodeGen/builtins-loongarch-lasx.c
+new file mode 100644
+index 000000000..0d6a54cb0
+--- /dev/null
++++ b/clang/test/CodeGen/builtins-loongarch-lasx.c
+@@ -0,0 +1,3772 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \
++// RUN:            -target-feature +lasx \
++// RUN:            -o - | FileCheck %s
++
++#include <lasxintrin.h>
++
++#define ui1_b 1
++#define ui2 1
++#define ui2_b ui2
++#define ui3 4
++#define ui3_b ui3
++#define ui4 7
++#define ui4_b ui4
++#define ui5 25
++#define ui5_b ui5
++#define ui6 44
++#define ui6_b ui6
++#define ui7 100
++#define ui7_b ui7
++#define ui8 127 //200
++#define ui8_b ui8
++#define si5_b -4
++#define si8 -100
++#define si9 0
++#define si10 0
++#define si11 0
++#define si12 0
++#define i10 500
++#define i13 4000
++#define mode 0
++#define idx1 1
++#define idx2 2
++#define idx3 4
++#define idx4 8
++
++void test(void) {
++  v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32i8 v32i8_r;
++
++  v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i16 v16i16_r;
++
++  v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i32 v8i32_r;
++
++  v4i64 v4i64_a = (v4i64){0, 1, 2, 3};
++  v4i64 v4i64_b = (v4i64){1, 2, 3, 4};
++  v4i64 v4i64_c = (v4i64){2, 3, 4, 5};
++  v4i64 v4i64_r;
++
++  v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32u8 v32u8_r;
++
++  v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u16 v16u16_r;
++
++  v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u32 v8u32_r;
++
++  v4u64 v4u64_a = (v4u64){0, 1, 2, 3};
++  v4u64 v4u64_b = (v4u64){1, 2, 3, 4};
++  v4u64 v4u64_c = (v4u64){2, 3, 4, 5};
++  v4u64 v4u64_r;
++
++  v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7};
++  v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8};
++  v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9};
++  v8f32 v8f32_r;
++  v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3};
++  v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4};
++  v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5};
++  v4f64 v4f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  long int i64_d = 0;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  // __lasx_xvsll_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b(
++
++  // __lasx_xvsll_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h(
++
++  // __lasx_xvsll_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w(
++
++  // __lasx_xvsll_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d(
++
++  // __lasx_xvslli_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b(
++
++  // __lasx_xvslli_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h(
++
++  // __lasx_xvslli_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w(
++
++  // __lasx_xvslli_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d(
++
++  // __lasx_xvsra_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b(
++
++  // __lasx_xvsra_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h(
++
++  // __lasx_xvsra_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w(
++
++  // __lasx_xvsra_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d(
++
++  // __lasx_xvsrai_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(
++
++  // __lasx_xvsrai_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(
++
++  // __lasx_xvsrai_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(
++
++  // __lasx_xvsrai_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(
++
++  // __lasx_xvsrar_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(
++
++  // __lasx_xvsrar_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(
++
++  // __lasx_xvsrar_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(
++
++  // __lasx_xvsrar_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(
++
++  // __lasx_xvsrari_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(
++
++  // __lasx_xvsrari_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(
++
++  // __lasx_xvsrari_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(
++
++  // __lasx_xvsrari_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(
++
++  // __lasx_xvsrl_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(
++
++  // __lasx_xvsrl_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(
++
++  // __lasx_xvsrl_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(
++
++  // __lasx_xvsrl_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(
++
++  // __lasx_xvsrli_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(
++
++  // __lasx_xvsrli_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(
++
++  // __lasx_xvsrli_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(
++
++  // __lasx_xvsrli_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(
++
++  // __lasx_xvsrlr_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(
++
++  // __lasx_xvsrlr_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(
++
++  // __lasx_xvsrlr_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(
++
++  // __lasx_xvsrlr_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(
++
++  // __lasx_xvsrlri_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(
++
++  // __lasx_xvsrlri_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(
++
++  // __lasx_xvsrlri_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(
++
++  // __lasx_xvsrlri_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(
++
++  // __lasx_xvbitclr_b
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(
++
++  // __lasx_xvbitclr_h
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(
++
++  // __lasx_xvbitclr_w
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(
++
++  // __lasx_xvbitclr_d
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(
++
++  // __lasx_xvbitclri_b
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(
++
++  // __lasx_xvbitclri_h
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(
++
++  // __lasx_xvbitclri_w
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(
++
++  // __lasx_xvbitclri_d
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(
++
++  // __lasx_xvbitset_b
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(
++
++  // __lasx_xvbitset_h
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(
++
++  // __lasx_xvbitset_w
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(
++
++  // __lasx_xvbitset_d
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(
++
++  // __lasx_xvbitseti_b
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(
++
++  // __lasx_xvbitseti_h
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(
++
++  // __lasx_xvbitseti_w
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(
++
++  // __lasx_xvbitseti_d
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(
++
++  // __lasx_xvbitrev_b
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(
++
++  // __lasx_xvbitrev_h
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(
++
++  // __lasx_xvbitrev_w
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(
++
++  // __lasx_xvbitrev_d
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(
++
++  // __lasx_xvbitrevi_b
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(
++
++  // __lasx_xvbitrevi_h
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(
++
++  // __lasx_xvbitrevi_w
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(
++
++  // __lasx_xvbitrevi_d
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(
++
++  // __lasx_xvadd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b(
++
++  // __lasx_xvadd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h(
++
++  // __lasx_xvadd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w(
++
++  // __lasx_xvadd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d(
++
++  // __lasx_xvaddi_bu
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(
++
++  // __lasx_xvaddi_hu
++  // xd, xj, ui5
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(
++
++  // __lasx_xvaddi_wu
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(
++
++  // __lasx_xvaddi_du
++  // xd, xj, ui5
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(
++
++  // __lasx_xvsub_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b(
++
++  // __lasx_xvsub_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h(
++
++  // __lasx_xvsub_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w(
++
++  // __lasx_xvsub_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d(
++
++  // __lasx_xvsubi_bu
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(
++
++  // __lasx_xvsubi_hu
++  // xd, xj, ui5
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(
++
++  // __lasx_xvsubi_wu
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(
++
++  // __lasx_xvsubi_du
++  // xd, xj, ui5
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(
++
++  // __lasx_xvmax_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b(
++
++  // __lasx_xvmax_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h(
++
++  // __lasx_xvmax_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w(
++
++  // __lasx_xvmax_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d(
++
++  // __lasx_xvmaxi_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(
++
++  // __lasx_xvmaxi_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(
++
++  // __lasx_xvmaxi_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(
++
++  // __lasx_xvmaxi_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(
++
++  // __lasx_xvmax_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(
++
++  // __lasx_xvmax_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(
++
++  // __lasx_xvmax_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(
++
++  // __lasx_xvmax_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du(
++
++  // __lasx_xvmaxi_bu
++  // xd, xj, ui5
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(
++
++  // __lasx_xvmaxi_hu
++  // xd, xj, ui5
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(
++
++  // __lasx_xvmaxi_wu
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(
++
++  // __lasx_xvmaxi_du
++  // xd, xj, ui5
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(
++
++  // __lasx_xvmin_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b(
++
++  // __lasx_xvmin_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h(
++
++  // __lasx_xvmin_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w(
++
++  // __lasx_xvmin_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d(
++
++  // __lasx_xvmini_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b(
++
++  // __lasx_xvmini_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h(
++
++  // __lasx_xvmini_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w(
++
++  // __lasx_xvmini_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d(
++
++  // __lasx_xvmin_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(
++
++  // __lasx_xvmin_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(
++
++  // __lasx_xvmin_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(
++
++  // __lasx_xvmin_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du(
++
++  // __lasx_xvmini_bu
++  // xd, xj, ui5
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(
++
++  // __lasx_xvmini_hu
++  // xd, xj, ui5
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(
++
++  // __lasx_xvmini_wu
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(
++
++  // __lasx_xvmini_du
++  // xd, xj, ui5
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du(
++
++  // __lasx_xvseq_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b(
++
++  // __lasx_xvseq_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h(
++
++  // __lasx_xvseq_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w(
++
++  // __lasx_xvseq_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d(
++
++  // __lasx_xvseqi_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(
++
++  // __lasx_xvseqi_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(
++
++  // __lasx_xvseqi_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(
++
++  // __lasx_xvseqi_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(
++
++  // __lasx_xvslt_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b(
++
++  // __lasx_xvslt_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h(
++
++  // __lasx_xvslt_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w(
++
++  // __lasx_xvslt_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d(
++
++  // __lasx_xvslti_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b(
++
++  // __lasx_xvslti_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h(
++
++  // __lasx_xvslti_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w(
++
++  // __lasx_xvslti_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d(
++
++  // __lasx_xvslt_bu
++  // xd, xj, xk
++  // V32QI, UV32QI, UV32QI
++  v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(
++
++  // __lasx_xvslt_hu
++  // xd, xj, xk
++  // V16HI, UV16HI, UV16HI
++  v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(
++
++  // __lasx_xvslt_wu
++  // xd, xj, xk
++  // V8SI, UV8SI, UV8SI
++  v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(
++
++  // __lasx_xvslt_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du(
++
++  // __lasx_xvslti_bu
++  // xd, xj, ui5
++  // V32QI, UV32QI, UQI
++  v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(
++
++  // __lasx_xvslti_hu
++  // xd, xj, ui5
++  // V16HI, UV16HI, UQI
++  v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(
++
++  // __lasx_xvslti_wu
++  // xd, xj, ui5
++  // V8SI, UV8SI, UQI
++  v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(
++
++  // __lasx_xvslti_du
++  // xd, xj, ui5
++  // V4DI, UV4DI, UQI
++  v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du(
++
++  // __lasx_xvsle_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b(
++
++  // __lasx_xvsle_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h(
++
++  // __lasx_xvsle_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w(
++
++  // __lasx_xvsle_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d(
++
++  // __lasx_xvslei_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b(
++
++  // __lasx_xvslei_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h(
++
++  // __lasx_xvslei_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w(
++
++  // __lasx_xvslei_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d(
++
++  // __lasx_xvsle_bu
++  // xd, xj, xk
++  // V32QI, UV32QI, UV32QI
++  v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(
++
++  // __lasx_xvsle_hu
++  // xd, xj, xk
++  // V16HI, UV16HI, UV16HI
++  v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(
++
++  // __lasx_xvsle_wu
++  // xd, xj, xk
++  // V8SI, UV8SI, UV8SI
++  v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(
++
++  // __lasx_xvsle_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du(
++
++  // __lasx_xvslei_bu
++  // xd, xj, ui5
++  // V32QI, UV32QI, UQI
++  v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(
++
++  // __lasx_xvslei_hu
++  // xd, xj, ui5
++  // V16HI, UV16HI, UQI
++  v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(
++
++  // __lasx_xvslei_wu
++  // xd, xj, ui5
++  // V8SI, UV8SI, UQI
++  v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(
++
++  // __lasx_xvslei_du
++  // xd, xj, ui5
++  // V4DI, UV4DI, UQI
++  v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du(
++
++  // __lasx_xvsat_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b(
++
++  // __lasx_xvsat_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h(
++
++  // __lasx_xvsat_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w(
++
++  // __lasx_xvsat_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d(
++
++  // __lasx_xvsat_bu
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(
++
++  // __lasx_xvsat_hu
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(
++
++  // __lasx_xvsat_wu
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(
++
++  // __lasx_xvsat_du
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du(
++
++  // __lasx_xvadda_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b(
++
++  // __lasx_xvadda_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h(
++
++  // __lasx_xvadda_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w(
++
++  // __lasx_xvadda_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d(
++
++  // __lasx_xvsadd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(
++
++  // __lasx_xvsadd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(
++
++  // __lasx_xvsadd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(
++
++  // __lasx_xvsadd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(
++
++  // __lasx_xvsadd_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(
++
++  // __lasx_xvsadd_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(
++
++  // __lasx_xvsadd_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(
++
++  // __lasx_xvsadd_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(
++
++  // __lasx_xvavg_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b(
++
++  // __lasx_xvavg_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h(
++
++  // __lasx_xvavg_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w(
++
++  // __lasx_xvavg_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d(
++
++  // __lasx_xvavg_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(
++
++  // __lasx_xvavg_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(
++
++  // __lasx_xvavg_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(
++
++  // __lasx_xvavg_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du(
++
++  // __lasx_xvavgr_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(
++
++  // __lasx_xvavgr_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(
++
++  // __lasx_xvavgr_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(
++
++  // __lasx_xvavgr_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(
++
++  // __lasx_xvavgr_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(
++
++  // __lasx_xvavgr_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(
++
++  // __lasx_xvavgr_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(
++
++  // __lasx_xvavgr_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(
++
++  // __lasx_xvssub_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b(
++
++  // __lasx_xvssub_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h(
++
++  // __lasx_xvssub_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w(
++
++  // __lasx_xvssub_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d(
++
++  // __lasx_xvssub_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(
++
++  // __lasx_xvssub_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(
++
++  // __lasx_xvssub_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(
++
++  // __lasx_xvssub_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du(
++
++  // __lasx_xvabsd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(
++
++  // __lasx_xvabsd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(
++
++  // __lasx_xvabsd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(
++
++  // __lasx_xvabsd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(
++
++  // __lasx_xvabsd_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(
++
++  // __lasx_xvabsd_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(
++
++  // __lasx_xvabsd_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(
++
++  // __lasx_xvabsd_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(
++
++  // __lasx_xvmul_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b(
++
++  // __lasx_xvmul_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h(
++
++  // __lasx_xvmul_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w(
++
++  // __lasx_xvmul_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d(
++
++  // __lasx_xvmadd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(
++
++  // __lasx_xvmadd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(
++
++  // __lasx_xvmadd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(
++
++  // __lasx_xvmadd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(
++
++  // __lasx_xvmsub_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(
++
++  // __lasx_xvmsub_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(
++
++  // __lasx_xvmsub_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(
++
++  // __lasx_xvmsub_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(
++
++  // __lasx_xvdiv_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(
++
++  // __lasx_xvdiv_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(
++
++  // __lasx_xvdiv_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(
++
++  // __lasx_xvdiv_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(
++
++  // __lasx_xvdiv_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(
++
++  // __lasx_xvdiv_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(
++
++  // __lasx_xvdiv_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(
++
++  // __lasx_xvdiv_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(
++
++  // __lasx_xvhaddw_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(
++
++  // __lasx_xvhaddw_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(
++
++  // __lasx_xvhaddw_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(
++
++  // __lasx_xvhaddw_hu_bu
++  // xd, xj, xk
++  // UV16HI, UV32QI, UV32QI
++  v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(
++
++  // __lasx_xvhaddw_wu_hu
++  // xd, xj, xk
++  // UV8SI, UV16HI, UV16HI
++  v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(
++
++  // __lasx_xvhaddw_du_wu
++  // xd, xj, xk
++  // UV4DI, UV8SI, UV8SI
++  v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(
++
++  // __lasx_xvhsubw_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(
++
++  // __lasx_xvhsubw_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(
++
++  // __lasx_xvhsubw_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(
++
++  // __lasx_xvhsubw_hu_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(
++
++  // __lasx_xvhsubw_wu_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(
++
++  // __lasx_xvhsubw_du_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(
++
++  // __lasx_xvmod_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b(
++
++  // __lasx_xvmod_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h(
++
++  // __lasx_xvmod_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w(
++
++  // __lasx_xvmod_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d(
++
++  // __lasx_xvmod_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(
++
++  // __lasx_xvmod_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(
++
++  // __lasx_xvmod_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(
++
++  // __lasx_xvmod_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du(
++
++  // __lasx_xvrepl128vei_b
++  // xd, xj, ui4
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(
++
++  // __lasx_xvrepl128vei_h
++  // xd, xj, ui3
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(
++
++  // __lasx_xvrepl128vei_w
++  // xd, xj, ui2
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(
++
++  // __lasx_xvrepl128vei_d
++  // xd, xj, ui1
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(
++
++  // __lasx_xvpickev_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(
++
++  // __lasx_xvpickev_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(
++
++  // __lasx_xvpickev_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(
++
++  // __lasx_xvpickev_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(
++
++  // __lasx_xvpickod_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(
++
++  // __lasx_xvpickod_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(
++
++  // __lasx_xvpickod_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(
++
++  // __lasx_xvpickod_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(
++
++  // __lasx_xvilvh_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(
++
++  // __lasx_xvilvh_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(
++
++  // __lasx_xvilvh_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(
++
++  // __lasx_xvilvh_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(
++
++  // __lasx_xvilvl_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(
++
++  // __lasx_xvilvl_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(
++
++  // __lasx_xvilvl_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(
++
++  // __lasx_xvilvl_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(
++
++  // __lasx_xvpackev_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(
++
++  // __lasx_xvpackev_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(
++
++  // __lasx_xvpackev_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(
++
++  // __lasx_xvpackev_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(
++
++  // __lasx_xvpackod_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(
++
++  // __lasx_xvpackod_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(
++
++  // __lasx_xvpackod_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(
++
++  // __lasx_xvpackod_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(
++
++  // __lasx_xvshuf_b
++  // xd, xj, xk, xa
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(
++
++  // __lasx_xvshuf_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(
++
++  // __lasx_xvshuf_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(
++
++  // __lasx_xvshuf_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(
++
++  // __lasx_xvand_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v(
++
++  // __lasx_xvandi_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b(
++
++  // __lasx_xvor_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v(
++
++  // __lasx_xvori_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b(
++
++  // __lasx_xvnor_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v(
++
++  // __lasx_xvnori_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b(
++
++  // __lasx_xvxor_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v(
++
++  // __lasx_xvxori_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b(
++
++  // __lasx_xvbitsel_v
++  // xd, xj, xk, xa
++  // UV32QI, UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(
++
++  // __lasx_xvbitseli_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(
++
++  // __lasx_xvshuf4i_b
++  // xd, xj, ui8
++  // V32QI, V32QI, USI
++  v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(
++
++  // __lasx_xvshuf4i_h
++  // xd, xj, ui8
++  // V16HI, V16HI, USI
++  v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(
++
++  // __lasx_xvshuf4i_w
++  // xd, xj, ui8
++  // V8SI, V8SI, USI
++  v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(
++
++  // __lasx_xvreplgr2vr_b
++  // xd, rj
++  // V32QI, SI
++  v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(
++
++  // __lasx_xvreplgr2vr_h
++  // xd, rj
++  // V16HI, SI
++  v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(
++
++  // __lasx_xvreplgr2vr_w
++  // xd, rj
++  // V8SI, SI
++  v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(
++
++  // __lasx_xvreplgr2vr_d
++  // xd, rj
++  // V4DI, DI
++  v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(
++
++  // __lasx_xvpcnt_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(
++
++  // __lasx_xvpcnt_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(
++
++  // __lasx_xvpcnt_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(
++
++  // __lasx_xvpcnt_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(
++
++  // __lasx_xvclo_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b(
++
++  // __lasx_xvclo_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h(
++
++  // __lasx_xvclo_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w(
++
++  // __lasx_xvclo_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d(
++
++  // __lasx_xvclz_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b(
++
++  // __lasx_xvclz_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h(
++
++  // __lasx_xvclz_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w(
++
++  // __lasx_xvclz_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d(
++
++  // __lasx_xvfcmp_caf_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(
++
++  // __lasx_xvfcmp_caf_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(
++
++  // __lasx_xvfcmp_cor_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(
++
++  // __lasx_xvfcmp_cor_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(
++
++  // __lasx_xvfcmp_cun_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(
++
++  // __lasx_xvfcmp_cun_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(
++
++  // __lasx_xvfcmp_cune_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(
++
++  // __lasx_xvfcmp_cune_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(
++
++  // __lasx_xvfcmp_cueq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(
++
++  // __lasx_xvfcmp_cueq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(
++
++  // __lasx_xvfcmp_ceq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(
++
++  // __lasx_xvfcmp_ceq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(
++
++  // __lasx_xvfcmp_cne_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(
++
++  // __lasx_xvfcmp_cne_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(
++
++  // __lasx_xvfcmp_clt_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(
++
++  // __lasx_xvfcmp_clt_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(
++
++  // __lasx_xvfcmp_cult_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(
++
++  // __lasx_xvfcmp_cult_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(
++
++  // __lasx_xvfcmp_cle_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(
++
++  // __lasx_xvfcmp_cle_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(
++
++  // __lasx_xvfcmp_cule_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(
++
++  // __lasx_xvfcmp_cule_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(
++
++  // __lasx_xvfcmp_saf_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(
++
++  // __lasx_xvfcmp_saf_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(
++
++  // __lasx_xvfcmp_sor_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(
++
++  // __lasx_xvfcmp_sor_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(
++
++  // __lasx_xvfcmp_sun_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(
++
++  // __lasx_xvfcmp_sun_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(
++
++  // __lasx_xvfcmp_sune_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(
++
++  // __lasx_xvfcmp_sune_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(
++
++  // __lasx_xvfcmp_sueq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(
++
++  // __lasx_xvfcmp_sueq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(
++
++  // __lasx_xvfcmp_seq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(
++
++  // __lasx_xvfcmp_seq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(
++
++  // __lasx_xvfcmp_sne_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(
++
++  // __lasx_xvfcmp_sne_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(
++
++  // __lasx_xvfcmp_slt_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(
++
++  // __lasx_xvfcmp_slt_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(
++
++  // __lasx_xvfcmp_sult_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(
++
++  // __lasx_xvfcmp_sult_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(
++
++  // __lasx_xvfcmp_sle_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(
++
++  // __lasx_xvfcmp_sle_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(
++
++  // __lasx_xvfcmp_sule_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(
++
++  // __lasx_xvfcmp_sule_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(
++
++  // __lasx_xvfadd_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s(
++
++  // __lasx_xvfadd_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d(
++
++  // __lasx_xvfsub_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s(
++
++  // __lasx_xvfsub_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d(
++
++  // __lasx_xvfmul_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s(
++
++  // __lasx_xvfmul_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d(
++
++  // __lasx_xvfdiv_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(
++
++  // __lasx_xvfdiv_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(
++
++  // __lasx_xvfcvt_h_s
++  // xd, xj, xk
++  // V16HI, V8SF, V8SF
++  v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(
++
++  // __lasx_xvfcvt_s_d
++  // xd, xj, xk
++  // V8SF, V4DF, V4DF
++  v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(
++
++  // __lasx_xvfmin_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s(
++
++  // __lasx_xvfmin_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d(
++
++  // __lasx_xvfmina_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s(
++
++  // __lasx_xvfmina_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d(
++
++  // __lasx_xvfmax_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s(
++
++  // __lasx_xvfmax_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d(
++
++  // __lasx_xvfmaxa_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(
++
++  // __lasx_xvfmaxa_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(
++
++  // __lasx_xvfclass_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(
++
++  // __lasx_xvfclass_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(
++
++  // __lasx_xvfsqrt_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(
++
++  // __lasx_xvfsqrt_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(
++
++  // __lasx_xvfrecip_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(
++
++  // __lasx_xvfrecip_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(
++
++  // __lasx_xvfrint_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s(
++
++  // __lasx_xvfrint_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d(
++
++  // __lasx_xvfrsqrt_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(
++
++  // __lasx_xvfrsqrt_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(
++
++  // __lasx_xvflogb_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s(
++
++  // __lasx_xvflogb_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d(
++
++  // __lasx_xvfcvth_s_h
++  // xd, xj
++  // V8SF, V16HI
++  v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(
++
++  // __lasx_xvfcvth_d_s
++  // xd, xj
++  // V4DF, V8SF
++  v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(
++
++  // __lasx_xvfcvtl_s_h
++  // xd, xj
++  // V8SF, V16HI
++  v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(
++
++  // __lasx_xvfcvtl_d_s
++  // xd, xj
++  // V4DF, V8SF
++  v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(
++
++  // __lasx_xvftint_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(
++
++  // __lasx_xvftint_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(
++
++  // __lasx_xvftint_wu_s
++  // xd, xj
++  // UV8SI, V8SF
++  v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(
++
++  // __lasx_xvftint_lu_d
++  // xd, xj
++  // UV4DI, V4DF
++  v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(
++
++  // __lasx_xvftintrz_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(
++
++  // __lasx_xvftintrz_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(
++
++  // __lasx_xvftintrz_wu_s
++  // xd, xj
++  // UV8SI, V8SF
++  v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(
++
++  // __lasx_xvftintrz_lu_d
++  // xd, xj
++  // UV4DI, V4DF
++  v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(
++
++  // __lasx_xvffint_s_w
++  // xd, xj
++  // V8SF, V8SI
++  v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(
++
++  // __lasx_xvffint_d_l
++  // xd, xj
++  // V4DF, V4DI
++  v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(
++
++  // __lasx_xvffint_s_wu
++  // xd, xj
++  // V8SF, UV8SI
++  v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(
++
++  // __lasx_xvffint_d_lu
++  // xd, xj
++  // V4DF, UV4DI
++  v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(
++
++  // __lasx_xvreplve_b
++  // xd, xj, rk
++  // V32QI, V32QI, SI
++  v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(
++
++  // __lasx_xvreplve_h
++  // xd, xj, rk
++  // V16HI, V16HI, SI
++  v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(
++
++  // __lasx_xvreplve_w
++  // xd, xj, rk
++  // V8SI, V8SI, SI
++  v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(
++
++  // __lasx_xvreplve_d
++  // xd, xj, rk
++  // V4DI, V4DI, SI
++  v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(
++
++  // __lasx_xvpermi_w
++  // xd, xj, ui8
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(
++
++  // __lasx_xvandn_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v(
++
++  // __lasx_xvneg_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b(
++
++  // __lasx_xvneg_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h(
++
++  // __lasx_xvneg_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w(
++
++  // __lasx_xvneg_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d(
++
++  // __lasx_xvmuh_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(
++
++  // __lasx_xvmuh_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(
++
++  // __lasx_xvmuh_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(
++
++  // __lasx_xvmuh_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(
++
++  // __lasx_xvmuh_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(
++
++  // __lasx_xvmuh_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(
++
++  // __lasx_xvmuh_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(
++
++  // __lasx_xvmuh_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(
++
++  // __lasx_xvsllwil_h_b
++  // xd, xj, ui3
++  // V16HI, V32QI, UQI
++  v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(
++
++  // __lasx_xvsllwil_w_h
++  // xd, xj, ui4
++  // V8SI, V16HI, UQI
++  v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(
++
++  // __lasx_xvsllwil_d_w
++  // xd, xj, ui5
++  // V4DI, V8SI, UQI
++  v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(
++
++  // __lasx_xvsllwil_hu_bu
++  // xd, xj, ui3
++  // UV16HI, UV32QI, UQI
++  v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(
++
++  // __lasx_xvsllwil_wu_hu
++  // xd, xj, ui4
++  // UV8SI, UV16HI, UQI
++  v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(
++
++  // __lasx_xvsllwil_du_wu
++  // xd, xj, ui5
++  // UV4DI, UV8SI, UQI
++  v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(
++
++  // __lasx_xvsran_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(
++
++  // __lasx_xvsran_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(
++
++  // __lasx_xvsran_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(
++
++  // __lasx_xvssran_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(
++
++  // __lasx_xvssran_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(
++
++  // __lasx_xvssran_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(
++
++  // __lasx_xvssran_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(
++
++  // __lasx_xvssran_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(
++
++  // __lasx_xvssran_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(
++
++  // __lasx_xvsrarn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(
++
++  // __lasx_xvsrarn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(
++
++  // __lasx_xvsrarn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(
++
++  // __lasx_xvssrarn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(
++
++  // __lasx_xvssrarn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(
++
++  // __lasx_xvssrarn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(
++
++  // __lasx_xvssrarn_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(
++
++  // __lasx_xvssrarn_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(
++
++  // __lasx_xvssrarn_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(
++
++  // __lasx_xvsrln_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(
++
++  // __lasx_xvsrln_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(
++
++  // __lasx_xvsrln_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(
++
++  // __lasx_xvssrln_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(
++
++  // __lasx_xvssrln_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(
++
++  // __lasx_xvssrln_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(
++
++  // __lasx_xvsrlrn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(
++
++  // __lasx_xvsrlrn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(
++
++  // __lasx_xvsrlrn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(
++
++  // __lasx_xvssrlrn_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(
++
++  // __lasx_xvssrlrn_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(
++
++  // __lasx_xvssrlrn_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(
++
++  // __lasx_xvfrstpi_b
++  // xd, xj, ui5
++  // V32QI, V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(
++
++  // __lasx_xvfrstpi_h
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(
++
++  // __lasx_xvfrstp_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(
++
++  // __lasx_xvfrstp_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(
++
++  // __lasx_xvshuf4i_d
++  // xd, xj, ui8
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(
++
++  // __lasx_xvbsrl_v
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(
++
++  // __lasx_xvbsll_v
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(
++
++  // __lasx_xvextrins_b
++  // xd, xj, ui8
++  // V32QI, V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(
++
++  // __lasx_xvextrins_h
++  // xd, xj, ui8
++  // V16HI, V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(
++
++  // __lasx_xvextrins_w
++  // xd, xj, ui8
++  // V8SI, V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(
++
++  // __lasx_xvextrins_d
++  // xd, xj, ui8
++  // V4DI, V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(
++
++  // __lasx_xvmskltz_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(
++
++  // __lasx_xvmskltz_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(
++
++  // __lasx_xvmskltz_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(
++
++  // __lasx_xvmskltz_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(
++
++  // __lasx_xvsigncov_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(
++
++  // __lasx_xvsigncov_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(
++
++  // __lasx_xvsigncov_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(
++
++  // __lasx_xvsigncov_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(
++
++  // __lasx_xvfmadd_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(
++
++  // __lasx_xvfmadd_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(
++
++  // __lasx_xvfmsub_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(
++
++  // __lasx_xvfmsub_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(
++
++  // __lasx_xvfnmadd_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(
++
++  // __lasx_xvfnmadd_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(
++
++  // __lasx_xvfnmsub_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(
++
++  // __lasx_xvfnmsub_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(
++
++  // __lasx_xvftintrne_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(
++
++  // __lasx_xvftintrne_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(
++
++  // __lasx_xvftintrp_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(
++
++  // __lasx_xvftintrp_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(
++
++  // __lasx_xvftintrm_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(
++
++  // __lasx_xvftintrm_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(
++
++  // __lasx_xvftint_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(
++
++  // __lasx_xvffint_s_l
++  // xd, xj, xk
++  // V8SF, V4DI, V4DI
++  v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(
++
++  // __lasx_xvftintrz_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(
++
++  // __lasx_xvftintrp_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(
++
++  // __lasx_xvftintrm_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(
++
++  // __lasx_xvftintrne_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(
++
++  // __lasx_xvftinth_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(
++
++  // __lasx_xvftintl_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(
++
++  // __lasx_xvffinth_d_w
++  // xd, xj
++  // V4DF, V8SI
++  v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(
++
++  // __lasx_xvffintl_d_w
++  // xd, xj
++  // V4DF, V8SI
++  v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(
++
++  // __lasx_xvftintrzh_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(
++
++  // __lasx_xvftintrzl_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(
++
++  // __lasx_xvftintrph_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(
++
++  // __lasx_xvftintrpl_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(
++
++  // __lasx_xvftintrmh_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(
++
++  // __lasx_xvftintrml_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(
++
++  // __lasx_xvftintrneh_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(
++
++  // __lasx_xvftintrnel_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(
++
++  // __lasx_xvfrintrne_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(
++
++  // __lasx_xvfrintrne_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(
++
++  // __lasx_xvfrintrz_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(
++
++  // __lasx_xvfrintrz_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(
++
++  // __lasx_xvfrintrp_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(
++
++  // __lasx_xvfrintrp_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(
++
++  // __lasx_xvfrintrm_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(
++
++  // __lasx_xvfrintrm_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(
++
++  // __lasx_xvld
++  // xd, rj, si12
++  // V32QI, CVPOINTER, SI
++  v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld(
++
++  // __lasx_xvst
++  // xd, rj, si12
++  // VOID, V32QI, CVPOINTER, SI
++  __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst(
++
++  // __lasx_xvstelm_b
++  // xd, rj, si8, idx
++  // VOID, V32QI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b(
++
++  // __lasx_xvstelm_h
++  // xd, rj, si8, idx
++  // VOID, V16HI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h(
++
++  // __lasx_xvstelm_w
++  // xd, rj, si8, idx
++  // VOID, V8SI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w(
++
++  // __lasx_xvstelm_d
++  // xd, rj, si8, idx
++  // VOID, V4DI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d(
++
++  // __lasx_xvinsve0_w
++  // xd, xj, ui3
++  // V8SI, V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(
++
++  // __lasx_xvinsve0_d
++  // xd, xj, ui2
++  // V4DI, V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(
++
++  // __lasx_xvpickve_w
++  // xd, xj, ui3
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(
++
++  // __lasx_xvpickve_d
++  // xd, xj, ui2
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(
++
++  // __lasx_xvssrlrn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(
++
++  // __lasx_xvssrlrn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(
++
++  // __lasx_xvssrlrn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(
++
++  // __lasx_xvssrln_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(
++
++  // __lasx_xvssrln_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(
++
++  // __lasx_xvssrln_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(
++
++  // __lasx_xvorn_v
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v(
++
++  // __lasx_xvldi
++  // xd, i13
++  // V4DI, HI
++  v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi(
++
++  // __lasx_xvldx
++  // xd, rj, rk
++  // V32QI, CVPOINTER, DI
++  v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx(
++
++  // __lasx_xvstx
++  // xd, rj, rk
++  // VOID, V32QI, CVPOINTER, DI
++  __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx(
++
++  // __lasx_xvinsgr2vr_w
++  // xd, rj, ui3
++  // V8SI, V8SI, SI, UQI
++  v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(
++
++  // __lasx_xvinsgr2vr_d
++  // xd, rj, ui2
++  // V4DI, V4DI, DI, UQI
++  v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(
++
++  // __lasx_xvreplve0_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(
++
++  // __lasx_xvreplve0_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(
++
++  // __lasx_xvreplve0_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(
++
++  // __lasx_xvreplve0_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(
++
++  // __lasx_xvreplve0_q
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(
++
++  // __lasx_vext2xv_h_b
++  // xd, xj
++  // V16HI, V32QI
++  v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(
++
++  // __lasx_vext2xv_w_h
++  // xd, xj
++  // V8SI, V16HI
++  v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(
++
++  // __lasx_vext2xv_d_w
++  // xd, xj
++  // V4DI, V8SI
++  v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(
++
++  // __lasx_vext2xv_w_b
++  // xd, xj
++  // V8SI, V32QI
++  v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(
++
++  //gcc build fail
++  // __lasx_vext2xv_d_h
++  // xd, xj
++  // V4DI, V16HI
++  v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(
++
++  // __lasx_vext2xv_d_b
++  // xd, xj
++  // V4DI, V32QI
++  v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(
++
++  // __lasx_vext2xv_hu_bu
++  // xd, xj
++  // V16HI, V32QI
++  v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(
++
++  // __lasx_vext2xv_wu_hu
++  // xd, xj
++  // V8SI, V16HI
++  v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(
++
++  // __lasx_vext2xv_du_wu
++  // xd, xj
++  // V4DI, V8SI
++  v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(
++
++  // __lasx_vext2xv_wu_bu
++  // xd, xj
++  // V8SI, V32QI
++  v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(
++
++  //gcc build fail
++  // __lasx_vext2xv_du_hu
++  // xd, xj
++  // V4DI, V16HI
++  v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(
++
++  // __lasx_vext2xv_du_bu
++  // xd, xj
++  // V4DI, V32QI
++  v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(
++
++  // __lasx_xvpermi_q
++  // xd, xj, ui8
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(
++
++  // __lasx_xvpermi_d
++  // xd, xj, ui8
++  // V4DI, V4DI, USI
++  v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(
++
++  // __lasx_xvperm_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w(
++
++  // __lasx_xvldrepl_b
++  // xd, rj, si12
++  // V32QI, CVPOINTER, SI
++  v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(
++
++  // __lasx_xvldrepl_h
++  // xd, rj, si11
++  // V16HI, CVPOINTER, SI
++  v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(
++
++  // __lasx_xvldrepl_w
++  // xd, rj, si10
++  // V8SI, CVPOINTER, SI
++  v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(
++
++  // __lasx_xvldrepl_d
++  // xd, rj, si9
++  // V4DI, CVPOINTER, SI
++  v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(
++
++  // __lasx_xvpickve2gr_w
++  // rd, xj, ui3
++  // SI, V8SI, UQI
++  i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w(
++
++  // __lasx_xvpickve2gr_wu
++  // rd, xj, ui3
++  // USI, V8SI, UQI
++  u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(
++
++  // __lasx_xvpickve2gr_d
++  // rd, xj, ui2
++  // DI, V4DI, UQI
++  i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d(
++
++  // __lasx_xvpickve2gr_du
++  // rd, xj, ui2
++  // UDI, V4DI, UQI
++  u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du(
++
++  // __lasx_xvaddwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(
++
++  // __lasx_xvaddwev_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(
++
++  // __lasx_xvaddwev_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(
++
++  // __lasx_xvaddwev_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(
++
++  // __lasx_xvaddwev_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(
++
++  // __lasx_xvaddwev_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(
++
++  // __lasx_xvaddwev_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(
++
++  // __lasx_xvaddwev_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(
++
++  // __lasx_xvsubwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(
++
++  // __lasx_xvsubwev_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(
++
++  // __lasx_xvsubwev_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(
++
++  // __lasx_xvsubwev_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(
++
++  // __lasx_xvsubwev_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(
++
++  // __lasx_xvsubwev_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(
++
++  // __lasx_xvsubwev_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(
++
++  // __lasx_xvsubwev_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(
++
++  // __lasx_xvmulwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(
++
++  // __lasx_xvmulwev_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(
++
++  // __lasx_xvmulwev_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(
++
++  // __lasx_xvmulwev_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(
++
++  // __lasx_xvmulwev_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(
++
++  // __lasx_xvmulwev_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(
++
++  // __lasx_xvmulwev_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(
++
++  // __lasx_xvmulwev_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(
++
++  // __lasx_xvaddwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(
++
++  // __lasx_xvaddwod_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(
++
++  // __lasx_xvaddwod_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(
++
++  // __lasx_xvaddwod_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(
++
++  // __lasx_xvaddwod_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(
++
++  // __lasx_xvaddwod_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(
++
++  // __lasx_xvaddwod_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(
++
++  // __lasx_xvaddwod_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(
++
++  // __lasx_xvsubwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(
++
++  // __lasx_xvsubwod_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(
++
++  // __lasx_xvsubwod_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(
++
++  // __lasx_xvsubwod_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(
++
++  // __lasx_xvsubwod_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(
++
++  // __lasx_xvsubwod_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(
++
++  // __lasx_xvsubwod_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(
++
++  // __lasx_xvsubwod_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(
++
++  // __lasx_xvmulwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(
++
++  // __lasx_xvmulwod_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(
++
++  // __lasx_xvmulwod_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(
++
++  // __lasx_xvmulwod_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(
++
++  // __lasx_xvmulwod_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(
++
++  // __lasx_xvmulwod_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(
++
++  // __lasx_xvmulwod_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(
++
++  // __lasx_xvmulwod_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(
++
++  // __lasx_xvaddwev_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(
++
++  // __lasx_xvaddwev_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(
++
++  // __lasx_xvaddwev_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(
++
++  // __lasx_xvmulwev_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(
++
++  // __lasx_xvmulwev_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(
++
++  // __lasx_xvmulwev_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(
++
++  // __lasx_xvaddwod_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(
++
++  // __lasx_xvaddwod_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(
++
++  // __lasx_xvaddwod_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(
++
++  // __lasx_xvmulwod_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(
++
++  // __lasx_xvmulwod_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(
++
++  // __lasx_xvmulwod_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(
++
++  // __lasx_xvhaddw_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(
++
++  // __lasx_xvhaddw_qu_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(
++
++  // __lasx_xvhsubw_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(
++
++  // __lasx_xvhsubw_qu_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(
++
++  // __lasx_xvmaddwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(
++
++  // __lasx_xvmaddwev_d_w
++  // xd, xj, xk
++  // V4DI, V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(
++
++  // __lasx_xvmaddwev_w_h
++  // xd, xj, xk
++  // V8SI, V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(
++
++  // __lasx_xvmaddwev_h_b
++  // xd, xj, xk
++  // V16HI, V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(
++
++  // __lasx_xvmaddwev_q_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(
++
++  // __lasx_xvmaddwev_d_wu
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV8SI, UV8SI
++  v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(
++
++  // __lasx_xvmaddwev_w_hu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV16HI, UV16HI
++  v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(
++
++  // __lasx_xvmaddwev_h_bu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV32QI, UV32QI
++  v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(
++
++  // __lasx_xvmaddwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(
++
++  // __lasx_xvmaddwod_d_w
++  // xd, xj, xk
++  // V4DI, V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(
++
++  // __lasx_xvmaddwod_w_h
++  // xd, xj, xk
++  // V8SI, V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(
++
++  // __lasx_xvmaddwod_h_b
++  // xd, xj, xk
++  // V16HI, V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(
++
++  // __lasx_xvmaddwod_q_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(
++
++  // __lasx_xvmaddwod_d_wu
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV8SI, UV8SI
++  v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(
++
++  // __lasx_xvmaddwod_w_hu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV16HI, UV16HI
++  v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(
++
++  // __lasx_xvmaddwod_h_bu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV32QI, UV32QI
++  v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(
++
++  // __lasx_xvmaddwev_q_du_d
++  // xd, xj, xk
++  // V4DI, V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(
++
++  // __lasx_xvmaddwev_d_wu_w
++  // xd, xj, xk
++  // V4DI, V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(
++
++  // __lasx_xvmaddwev_w_hu_h
++  // xd, xj, xk
++  // V8SI, V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(
++
++  // __lasx_xvmaddwev_h_bu_b
++  // xd, xj, xk
++  // V16HI, V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(
++
++  // __lasx_xvmaddwod_q_du_d
++  // xd, xj, xk
++  // V4DI, V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(
++
++  // __lasx_xvmaddwod_d_wu_w
++  // xd, xj, xk
++  // V4DI, V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(
++
++  // __lasx_xvmaddwod_w_hu_h
++  // xd, xj, xk
++  // V8SI, V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(
++
++  // __lasx_xvmaddwod_h_bu_b
++  // xd, xj, xk
++  // V16HI, V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(
++
++  // __lasx_xvrotr_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(
++
++  // __lasx_xvrotr_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(
++
++  // __lasx_xvrotr_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(
++
++  // __lasx_xvrotr_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(
++
++  // __lasx_xvadd_q
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q(
++
++  // __lasx_xvsub_q
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q(
++
++  // __lasx_xvaddwev_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(
++
++  // __lasx_xvaddwod_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(
++
++  // __lasx_xvmulwev_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(
++
++  // __lasx_xvmulwod_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(
++
++  // __lasx_xvmskgez_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(
++
++  // __lasx_xvmsknz_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(
++
++  // __lasx_xvexth_h_b
++  // xd, xj
++  // V16HI, V32QI
++  v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(
++
++  // __lasx_xvexth_w_h
++  // xd, xj
++  // V8SI, V16HI
++  v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(
++
++  // __lasx_xvexth_d_w
++  // xd, xj
++  // V4DI, V8SI
++  v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(
++
++  // __lasx_xvexth_q_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(
++
++  // __lasx_xvexth_hu_bu
++  // xd, xj
++  // UV16HI, UV32QI
++  v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(
++
++  // __lasx_xvexth_wu_hu
++  // xd, xj
++  // UV8SI, UV16HI
++  v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(
++
++  // __lasx_xvexth_du_wu
++  // xd, xj
++  // UV4DI, UV8SI
++  v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(
++
++  // __lasx_xvexth_qu_du
++  // xd, xj
++  // UV4DI, UV4DI
++  v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(
++
++  // __lasx_xvextl_q_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(
++
++  // __lasx_xvextl_qu_du
++  // xd, xj
++  // UV4DI, UV4DI
++  v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(
++
++  // __lasx_xvrotri_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(
++
++  // __lasx_xvrotri_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(
++
++  // __lasx_xvrotri_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(
++
++  // __lasx_xvrotri_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(
++
++  // __lasx_xvsrlni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(
++
++  // __lasx_xvsrlni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(
++
++  // __lasx_xvsrlni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(
++
++  // __lasx_xvsrlni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(
++
++  // __lasx_xvsrlrni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(
++
++  // __lasx_xvsrlrni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(
++
++  // __lasx_xvsrlrni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(
++
++  // __lasx_xvsrlrni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(
++
++  // __lasx_xvssrlni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(
++
++  // __lasx_xvssrlni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(
++
++  // __lasx_xvssrlni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(
++
++  // __lasx_xvssrlni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(
++
++  // __lasx_xvssrlni_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(
++
++  // __lasx_xvssrlni_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(
++
++  // __lasx_xvssrlni_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(
++
++  // __lasx_xvssrlni_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(
++
++  // __lasx_xvssrlrni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(
++
++  // __lasx_xvssrlrni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(
++
++  // __lasx_xvssrlrni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(
++
++  // __lasx_xvssrlrni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(
++
++  // __lasx_xvssrlrni_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(
++
++  // __lasx_xvssrlrni_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(
++
++  // __lasx_xvssrlrni_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(
++
++  // __lasx_xvssrlrni_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(
++
++  // __lasx_xvsrani_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(
++
++  // __lasx_xvsrani_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(
++
++  // __lasx_xvsrani_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(
++
++  // __lasx_xvsrani_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(
++
++  // __lasx_xvsrarni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(
++
++  // __lasx_xvsrarni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(
++
++  // __lasx_xvsrarni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(
++
++  // __lasx_xvsrarni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(
++
++  // __lasx_xvssrani_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(
++
++  // __lasx_xvssrani_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(
++
++  // __lasx_xvssrani_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(
++
++  // __lasx_xvssrani_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(
++
++  // __lasx_xvssrani_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(
++
++  // __lasx_xvssrani_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(
++
++  // __lasx_xvssrani_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(
++
++  // __lasx_xvssrani_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(
++
++  // __lasx_xvssrarni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(
++
++  // __lasx_xvssrarni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(
++
++  // __lasx_xvssrarni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(
++
++  // __lasx_xvssrarni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(
++
++  // __lasx_xvssrarni_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(
++
++  // __lasx_xvssrarni_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(
++
++  // __lasx_xvssrarni_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(
++
++  // __lasx_xvssrarni_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(
++
++  // __lasx_xbnz_v
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v(
++
++  // __lasx_xbz_v
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v(
++
++  // __lasx_xbnz_b
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b(
++
++  // __lasx_xbnz_h
++  // rd, xj
++  // SI, UV16HI
++  i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h(
++
++  // __lasx_xbnz_w
++  // rd, xj
++  // SI, UV8SI
++  i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w(
++
++  // __lasx_xbnz_d
++  // rd, xj
++  // SI, UV4DI
++  i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d(
++
++  // __lasx_xbz_b
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b(
++
++  // __lasx_xbz_h
++  // rd, xj
++  // SI, UV16HI
++  i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h(
++
++  // __lasx_xbz_w
++  // rd, xj
++  // SI, UV8SI
++  i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w(
++
++  // __lasx_xbz_d
++  // rd, xj
++  // SI, UV4DI
++  i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d(
++
++  v32i8_r = __lasx_xvrepli_b(2); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(
++
++  v16i16_r = __lasx_xvrepli_h(2); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(
++
++  v8i32_r = __lasx_xvrepli_w(2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(
++
++  v4i64_r = __lasx_xvrepli_d(2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(
++
++  v4f64_r = __lasx_xvpickve_d_f(v4f64_a, 2); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(
++
++  v8f32_r = __lasx_xvpickve_w_f(v8f32_a, 2); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(
++}
+diff --git a/clang/test/CodeGen/builtins-loongarch-lsx-error.c b/clang/test/CodeGen/builtins-loongarch-lsx-error.c
+new file mode 100644
+index 000000000..f566a7362
+--- /dev/null
++++ b/clang/test/CodeGen/builtins-loongarch-lsx-error.c
+@@ -0,0 +1,250 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \
++// RUN:            -target-feature +lsx \
++// RUN:            -verify -o - 2>&1
++
++#include <lsxintrin.h>
++
++void test() {
++  v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i8 v16i8_r;
++  v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i16 v8i16_r;
++  v4i32 v4i32_a = (v4i32){0, 1, 2, 3};
++  v4i32 v4i32_b = (v4i32){1, 2, 3, 4};
++  v4i32 v4i32_c = (v4i32){2, 3, 4, 5};
++  v4i32 v4i32_r;
++  v2i64 v2i64_a = (v2i64){0, 1};
++  v2i64 v2i64_b = (v2i64){1, 2};
++  v2i64 v2i64_c = (v2i64){2, 3};
++  v2i64 v2i64_r;
++
++  v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u8 v16u8_r;
++  v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u16 v8u16_r;
++  v4u32 v4u32_a = (v4u32){0, 1, 2, 3};
++  v4u32 v4u32_b = (v4u32){1, 2, 3, 4};
++  v4u32 v4u32_c = (v4u32){2, 3, 4, 5};
++  v4u32 v4u32_r;
++  v2u64 v2u64_a = (v2u64){0, 1};
++  v2u64 v2u64_b = (v2u64){1, 2};
++  v2u64 v2u64_c = (v2u64){2, 3};
++  v2u64 v2u64_r;
++
++  v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3};
++  v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4};
++  v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5};
++  v4f32 v4f32_r;
++  v2f64 v2f64_a = (v2f64){0.5, 1};
++  v2f64 v2f64_b = (v2f64){1.5, 2};
++  v2f64 v2f64_c = (v2f64){2.5, 3};
++  v2f64 v2f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  v16i8_r = __lsx_vslli_b(v16i8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vslli_h(v8i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vslli_w(v4i32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vslli_d(v2i64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrai_b(v16i8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrai_h(v8i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrai_w(v4i32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrai_d(v2i64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrari_b(v16i8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrari_h(v8i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrari_w(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrari_d(v2i64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrli_b(v16i8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrli_h(v8i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrli_w(v4i32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrli_d(v2i64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrlri_b(v16i8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrlri_h(v8i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrlri_w(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrlri_d(v2i64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vbitclri_b(v16u8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vbitclri_h(v8u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vbitclri_w(v4u32_a, 32);              // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vbitclri_d(v2u64_a, 64);              // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vbitseti_b(v16u8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vbitseti_h(v8u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vbitseti_w(v4u32_a, 32);              // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vbitseti_d(v2u64_a, 64);              // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32);              // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64);              // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vaddi_bu(v16i8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vaddi_hu(v8i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vaddi_wu(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vaddi_du(v2i64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vsubi_bu(v16i8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vsubi_hu(v8i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsubi_wu(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsubi_du(v2i64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vmaxi_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vmaxi_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vmaxi_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vmaxi_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vmaxi_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vmini_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vmini_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vmini_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vmini_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16u8_r = __lsx_vmini_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u16_r = __lsx_vmini_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vmini_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vmini_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vseqi_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vseqi_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vseqi_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vseqi_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i8_r = __lsx_vslti_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vslti_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vslti_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vslti_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i8_r = __lsx_vslti_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vslti_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vslti_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vslti_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vslei_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vslei_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vslei_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vslei_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i8_r = __lsx_vslei_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vslei_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vslei_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vslei_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vsat_b(v16i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsat_h(v8i16_a, 16);                  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsat_w(v4i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsat_d(v2i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vsat_bu(v16u8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vsat_hu(v8u16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vsat_wu(v4u32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vsat_du(v2u64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vreplvei_b(v16i8_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vreplvei_h(v8i16_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i32_r = __lsx_vreplvei_w(v4i32_a, 4);               // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v2i64_r = __lsx_vreplvei_d(v2i64_a, 2);               // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v16u8_r = __lsx_vandi_b(v16u8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vori_b(v16u8_a, 256);                 // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vnori_b(v16u8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vxori_b(v16u8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  i32_r = __lsx_vpickve2gr_b(v16i8_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  i32_r = __lsx_vpickve2gr_h(v8i16_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  i32_r = __lsx_vpickve2gr_w(v4i32_a, 4);               // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  i64_r = __lsx_vpickve2gr_d(v2i64_a, 2);               // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8);              // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4);              // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  u64_r = __lsx_vpickve2gr_du(v2i64_a, 2);              // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8);       // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4);       // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2);       // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8);              // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32);             // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8);            // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16);           // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32);           // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32);      // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32);      // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256);     // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i8_r = __lsx_vbsrl_v(v16i8_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vbsll_v(v16i8_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8);              // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4);              // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2);              // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049);           // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025);           // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}}
++  v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513);            // expected-error {{argument value -513 is outside the valid range [-512, 511]}}
++  v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257);            // expected-error {{argument value -257 is outside the valid range [-256, 255]}}
++  v16i8_r = __lsx_vrotri_b(v16i8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vrotri_h(v8i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vrotri_w(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vrotri_d(v2i64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32);     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64);  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32);     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64);  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i8_r = __lsx_vld(&v16i8_a, -2049);                 // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  __lsx_vst(v16i8_a, &v16i8_b, -2049);                  // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  v2i64_r = __lsx_vldi(-4097);                          // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}}
++}
+diff --git a/clang/test/CodeGen/builtins-loongarch-lsx.c b/clang/test/CodeGen/builtins-loongarch-lsx.c
+new file mode 100644
+index 000000000..2b86c0b2e
+--- /dev/null
++++ b/clang/test/CodeGen/builtins-loongarch-lsx.c
+@@ -0,0 +1,3645 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \
++// RUN:            -target-feature +lsx \
++// RUN:            -o - | FileCheck %s
++
++#include <lsxintrin.h>
++
++#define ui1 0
++#define ui2 1
++#define ui3 4
++#define ui4 7
++#define ui5 25
++#define ui6 44
++#define ui7 100
++#define ui8 127 //200
++#define si5 -4
++#define si8 -100
++#define si9 0
++#define si10 0
++#define si11 0
++#define si12 0
++#define i10 500
++#define i13 4000
++#define mode 11
++#define idx1 1
++#define idx2 2
++#define idx3 4
++#define idx4 8
++
++void test(void) {
++  v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i8 v16i8_r;
++  v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i16 v8i16_r;
++  v4i32 v4i32_a = (v4i32){0, 1, 2, 3};
++  v4i32 v4i32_b = (v4i32){1, 2, 3, 4};
++  v4i32 v4i32_c = (v4i32){2, 3, 4, 5};
++  v4i32 v4i32_r;
++  v2i64 v2i64_a = (v2i64){0, 1};
++  v2i64 v2i64_b = (v2i64){1, 2};
++  v2i64 v2i64_c = (v2i64){2, 3};
++  v2i64 v2i64_r;
++
++  v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u8 v16u8_r;
++  v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u16 v8u16_r;
++  v4u32 v4u32_a = (v4u32){0, 1, 2, 3};
++  v4u32 v4u32_b = (v4u32){1, 2, 3, 4};
++  v4u32 v4u32_c = (v4u32){2, 3, 4, 5};
++  v4u32 v4u32_r;
++  v2u64 v2u64_a = (v2u64){0, 1};
++  v2u64 v2u64_b = (v2u64){1, 2};
++  v2u64 v2u64_c = (v2u64){2, 3};
++  v2u64 v2u64_r;
++
++  v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3};
++  v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4};
++  v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5};
++  v4f32 v4f32_r;
++  v2f64 v2f64_a = (v2f64){0.5, 1};
++  v2f64 v2f64_b = (v2f64){1.5, 2};
++  v2f64 v2f64_c = (v2f64){2.5, 3};
++  v2f64 v2f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  long int i64_d = 0;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  // __lsx_vsll_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b(
++
++  // __lsx_vsll_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h(
++
++  // __lsx_vsll_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w(
++
++  // __lsx_vsll_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d(
++
++  // __lsx_vslli_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b(
++
++  // __lsx_vslli_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h(
++
++  // __lsx_vslli_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w(
++
++  // __lsx_vslli_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d(
++
++  // __lsx_vsra_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b(
++
++  // __lsx_vsra_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h(
++
++  // __lsx_vsra_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w(
++
++  // __lsx_vsra_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d(
++
++  // __lsx_vsrai_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b(
++
++  // __lsx_vsrai_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h(
++
++  // __lsx_vsrai_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w(
++
++  // __lsx_vsrai_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d(
++
++  // __lsx_vsrar_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b(
++
++  // __lsx_vsrar_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h(
++
++  // __lsx_vsrar_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w(
++
++  // __lsx_vsrar_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d(
++
++  // __lsx_vsrari_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b(
++
++  // __lsx_vsrari_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h(
++
++  // __lsx_vsrari_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w(
++
++  // __lsx_vsrari_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d(
++
++  // __lsx_vsrl_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b(
++
++  // __lsx_vsrl_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h(
++
++  // __lsx_vsrl_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w(
++
++  // __lsx_vsrl_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d(
++
++  // __lsx_vsrli_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b(
++
++  // __lsx_vsrli_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h(
++
++  // __lsx_vsrli_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w(
++
++  // __lsx_vsrli_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d(
++
++  // __lsx_vsrlr_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(
++
++  // __lsx_vsrlr_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(
++
++  // __lsx_vsrlr_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(
++
++  // __lsx_vsrlr_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(
++
++  // __lsx_vsrlri_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(
++
++  // __lsx_vsrlri_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(
++
++  // __lsx_vsrlri_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(
++
++  // __lsx_vsrlri_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(
++
++  // __lsx_vbitclr_b
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(
++
++  // __lsx_vbitclr_h
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(
++
++  // __lsx_vbitclr_w
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(
++
++  // __lsx_vbitclr_d
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(
++
++  // __lsx_vbitclri_b
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(
++
++  // __lsx_vbitclri_h
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(
++
++  // __lsx_vbitclri_w
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(
++
++  // __lsx_vbitclri_d
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(
++
++  // __lsx_vbitset_b
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b(
++
++  // __lsx_vbitset_h
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h(
++
++  // __lsx_vbitset_w
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w(
++
++  // __lsx_vbitset_d
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d(
++
++  // __lsx_vbitseti_b
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(
++
++  // __lsx_vbitseti_h
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(
++
++  // __lsx_vbitseti_w
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(
++
++  // __lsx_vbitseti_d
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(
++
++  // __lsx_vbitrev_b
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(
++
++  // __lsx_vbitrev_h
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(
++
++  // __lsx_vbitrev_w
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(
++
++  // __lsx_vbitrev_d
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(
++
++  // __lsx_vbitrevi_b
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(
++
++  // __lsx_vbitrevi_h
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(
++
++  // __lsx_vbitrevi_w
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(
++
++  // __lsx_vbitrevi_d
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(
++
++  // __lsx_vadd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b(
++
++  // __lsx_vadd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h(
++
++  // __lsx_vadd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w(
++
++  // __lsx_vadd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d(
++
++  // __lsx_vaddi_bu
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(
++
++  // __lsx_vaddi_hu
++  // vd, vj, ui5
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(
++
++  // __lsx_vaddi_wu
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(
++
++  // __lsx_vaddi_du
++  // vd, vj, ui5
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du(
++
++  // __lsx_vsub_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b(
++
++  // __lsx_vsub_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h(
++
++  // __lsx_vsub_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w(
++
++  // __lsx_vsub_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d(
++
++  // __lsx_vsubi_bu
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(
++
++  // __lsx_vsubi_hu
++  // vd, vj, ui5
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(
++
++  // __lsx_vsubi_wu
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(
++
++  // __lsx_vsubi_du
++  // vd, vj, ui5
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du(
++
++  // __lsx_vmax_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b(
++
++  // __lsx_vmax_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h(
++
++  // __lsx_vmax_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w(
++
++  // __lsx_vmax_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d(
++
++  // __lsx_vmaxi_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(
++
++  // __lsx_vmaxi_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(
++
++  // __lsx_vmaxi_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(
++
++  // __lsx_vmaxi_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(
++
++  // __lsx_vmax_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu(
++
++  // __lsx_vmax_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu(
++
++  // __lsx_vmax_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu(
++
++  // __lsx_vmax_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du(
++
++  // __lsx_vmaxi_bu
++  // vd, vj, ui5
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(
++
++  // __lsx_vmaxi_hu
++  // vd, vj, ui5
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(
++
++  // __lsx_vmaxi_wu
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(
++
++  // __lsx_vmaxi_du
++  // vd, vj, ui5
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(
++
++  // __lsx_vmin_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b(
++
++  // __lsx_vmin_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h(
++
++  // __lsx_vmin_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w(
++
++  // __lsx_vmin_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d(
++
++  // __lsx_vmini_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b(
++
++  // __lsx_vmini_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h(
++
++  // __lsx_vmini_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w(
++
++  // __lsx_vmini_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d(
++
++  // __lsx_vmin_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu(
++
++  // __lsx_vmin_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu(
++
++  // __lsx_vmin_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu(
++
++  // __lsx_vmin_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du(
++
++  // __lsx_vmini_bu
++  // vd, vj, ui5
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu(
++
++  // __lsx_vmini_hu
++  // vd, vj, ui5
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu(
++
++  // __lsx_vmini_wu
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu(
++
++  // __lsx_vmini_du
++  // vd, vj, ui5
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du(
++
++  // __lsx_vseq_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b(
++
++  // __lsx_vseq_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h(
++
++  // __lsx_vseq_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w(
++
++  // __lsx_vseq_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d(
++
++  // __lsx_vseqi_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b(
++
++  // __lsx_vseqi_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h(
++
++  // __lsx_vseqi_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w(
++
++  // __lsx_vseqi_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d(
++
++  // __lsx_vslti_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b(
++
++  // __lsx_vslt_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b(
++
++  // __lsx_vslt_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h(
++
++  // __lsx_vslt_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w(
++
++  // __lsx_vslt_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d(
++
++  // __lsx_vslti_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h(
++
++  // __lsx_vslti_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w(
++
++  // __lsx_vslti_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d(
++
++  // __lsx_vslt_bu
++  // vd, vj, vk
++  // V16QI, UV16QI, UV16QI
++  v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu(
++
++  // __lsx_vslt_hu
++  // vd, vj, vk
++  // V8HI, UV8HI, UV8HI
++  v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu(
++
++  // __lsx_vslt_wu
++  // vd, vj, vk
++  // V4SI, UV4SI, UV4SI
++  v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu(
++
++  // __lsx_vslt_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du(
++
++  // __lsx_vslti_bu
++  // vd, vj, ui5
++  // V16QI, UV16QI, UQI
++  v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu(
++
++  // __lsx_vslti_hu
++  // vd, vj, ui5
++  // V8HI, UV8HI, UQI
++  v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu(
++
++  // __lsx_vslti_wu
++  // vd, vj, ui5
++  // V4SI, UV4SI, UQI
++  v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu(
++
++  // __lsx_vslti_du
++  // vd, vj, ui5
++  // V2DI, UV2DI, UQI
++  v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du(
++
++  // __lsx_vsle_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b(
++
++  // __lsx_vsle_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h(
++
++  // __lsx_vsle_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w(
++
++  // __lsx_vsle_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d(
++
++  // __lsx_vslei_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b(
++
++  // __lsx_vslei_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h(
++
++  // __lsx_vslei_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w(
++
++  // __lsx_vslei_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d(
++
++  // __lsx_vsle_bu
++  // vd, vj, vk
++  // V16QI, UV16QI, UV16QI
++  v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu(
++
++  // __lsx_vsle_hu
++  // vd, vj, vk
++  // V8HI, UV8HI, UV8HI
++  v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu(
++
++  // __lsx_vsle_wu
++  // vd, vj, vk
++  // V4SI, UV4SI, UV4SI
++  v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu(
++
++  // __lsx_vsle_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du(
++
++  // __lsx_vslei_bu
++  // vd, vj, ui5
++  // V16QI, UV16QI, UQI
++  v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu(
++
++  // __lsx_vslei_hu
++  // vd, vj, ui5
++  // V8HI, UV8HI, UQI
++  v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu(
++
++  // __lsx_vslei_wu
++  // vd, vj, ui5
++  // V4SI, UV4SI, UQI
++  v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu(
++
++  // __lsx_vslei_du
++  // vd, vj, ui5
++  // V2DI, UV2DI, UQI
++  v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du(
++
++  // __lsx_vsat_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b(
++
++  // __lsx_vsat_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h(
++
++  // __lsx_vsat_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w(
++
++  // __lsx_vsat_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d(
++
++  // __lsx_vsat_bu
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu(
++
++  // __lsx_vsat_hu
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu(
++
++  // __lsx_vsat_wu
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu(
++
++  // __lsx_vsat_du
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du(
++
++  // __lsx_vadda_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b(
++
++  // __lsx_vadda_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h(
++
++  // __lsx_vadda_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w(
++
++  // __lsx_vadda_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d(
++
++  // __lsx_vsadd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b(
++
++  // __lsx_vsadd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h(
++
++  // __lsx_vsadd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w(
++
++  // __lsx_vsadd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d(
++
++  // __lsx_vsadd_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(
++
++  // __lsx_vsadd_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(
++
++  // __lsx_vsadd_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(
++
++  // __lsx_vsadd_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du(
++
++  // __lsx_vavg_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b(
++
++  // __lsx_vavg_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h(
++
++  // __lsx_vavg_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w(
++
++  // __lsx_vavg_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d(
++
++  // __lsx_vavg_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu(
++
++  // __lsx_vavg_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu(
++
++  // __lsx_vavg_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu(
++
++  // __lsx_vavg_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du(
++
++  // __lsx_vavgr_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b(
++
++  // __lsx_vavgr_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h(
++
++  // __lsx_vavgr_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w(
++
++  // __lsx_vavgr_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d(
++
++  // __lsx_vavgr_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(
++
++  // __lsx_vavgr_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(
++
++  // __lsx_vavgr_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(
++
++  // __lsx_vavgr_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du(
++
++  // __lsx_vssub_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b(
++
++  // __lsx_vssub_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h(
++
++  // __lsx_vssub_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w(
++
++  // __lsx_vssub_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d(
++
++  // __lsx_vssub_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu(
++
++  // __lsx_vssub_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu(
++
++  // __lsx_vssub_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu(
++
++  // __lsx_vssub_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du(
++
++  // __lsx_vabsd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b(
++
++  // __lsx_vabsd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h(
++
++  // __lsx_vabsd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w(
++
++  // __lsx_vabsd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d(
++
++  // __lsx_vabsd_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(
++
++  // __lsx_vabsd_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(
++
++  // __lsx_vabsd_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(
++
++  // __lsx_vabsd_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du(
++
++  // __lsx_vmul_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b(
++
++  // __lsx_vmul_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h(
++
++  // __lsx_vmul_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w(
++
++  // __lsx_vmul_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d(
++
++  // __lsx_vmadd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b(
++
++  // __lsx_vmadd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h(
++
++  // __lsx_vmadd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w(
++
++  // __lsx_vmadd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d(
++
++  // __lsx_vmsub_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b(
++
++  // __lsx_vmsub_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h(
++
++  // __lsx_vmsub_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w(
++
++  // __lsx_vmsub_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d(
++
++  // __lsx_vdiv_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b(
++
++  // __lsx_vdiv_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h(
++
++  // __lsx_vdiv_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w(
++
++  // __lsx_vdiv_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d(
++
++  // __lsx_vdiv_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(
++
++  // __lsx_vdiv_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(
++
++  // __lsx_vdiv_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(
++
++  // __lsx_vdiv_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du(
++
++  // __lsx_vhaddw_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(
++
++  // __lsx_vhaddw_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(
++
++  // __lsx_vhaddw_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(
++
++  // __lsx_vhaddw_hu_bu
++  // vd, vj, vk
++  // UV8HI, UV16QI, UV16QI
++  v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(
++
++  // __lsx_vhaddw_wu_hu
++  // vd, vj, vk
++  // UV4SI, UV8HI, UV8HI
++  v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(
++
++  // __lsx_vhaddw_du_wu
++  // vd, vj, vk
++  // UV2DI, UV4SI, UV4SI
++  v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(
++
++  // __lsx_vhsubw_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(
++
++  // __lsx_vhsubw_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(
++
++  // __lsx_vhsubw_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(
++
++  // __lsx_vhsubw_hu_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(
++
++  // __lsx_vhsubw_wu_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(
++
++  // __lsx_vhsubw_du_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(
++
++  // __lsx_vmod_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b(
++
++  // __lsx_vmod_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h(
++
++  // __lsx_vmod_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w(
++
++  // __lsx_vmod_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d(
++
++  // __lsx_vmod_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu(
++
++  // __lsx_vmod_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu(
++
++  // __lsx_vmod_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu(
++
++  // __lsx_vmod_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du(
++
++  // __lsx_vreplve_b
++  // vd, vj, rk
++  // V16QI, V16QI, SI
++  v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b(
++
++  // __lsx_vreplve_h
++  // vd, vj, rk
++  // V8HI, V8HI, SI
++  v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h(
++
++  // __lsx_vreplve_w
++  // vd, vj, rk
++  // V4SI, V4SI, SI
++  v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w(
++
++  // __lsx_vreplve_d
++  // vd, vj, rk
++  // V2DI, V2DI, SI
++  v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d(
++
++  // __lsx_vreplvei_b
++  // vd, vj, ui4
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(
++
++  // __lsx_vreplvei_h
++  // vd, vj, ui3
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(
++
++  // __lsx_vreplvei_w
++  // vd, vj, ui2
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(
++
++  // __lsx_vreplvei_d
++  // vd, vj, ui1
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(
++
++  // __lsx_vpickev_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b(
++
++  // __lsx_vpickev_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h(
++
++  // __lsx_vpickev_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w(
++
++  // __lsx_vpickev_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d(
++
++  // __lsx_vpickod_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b(
++
++  // __lsx_vpickod_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h(
++
++  // __lsx_vpickod_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w(
++
++  // __lsx_vpickod_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d(
++
++  // __lsx_vilvh_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b(
++
++  // __lsx_vilvh_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h(
++
++  // __lsx_vilvh_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w(
++
++  // __lsx_vilvh_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d(
++
++  // __lsx_vilvl_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b(
++
++  // __lsx_vilvl_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h(
++
++  // __lsx_vilvl_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w(
++
++  // __lsx_vilvl_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d(
++
++  // __lsx_vpackev_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b(
++
++  // __lsx_vpackev_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h(
++
++  // __lsx_vpackev_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w(
++
++  // __lsx_vpackev_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d(
++
++  // __lsx_vpackod_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b(
++
++  // __lsx_vpackod_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h(
++
++  // __lsx_vpackod_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w(
++
++  // __lsx_vpackod_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d(
++
++  // __lsx_vshuf_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h(
++
++  // __lsx_vshuf_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w(
++
++  // __lsx_vshuf_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d(
++
++  // __lsx_vand_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v(
++
++  // __lsx_vandi_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b(
++
++  // __lsx_vor_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v(
++
++  // __lsx_vori_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b(
++
++  // __lsx_vnor_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v(
++
++  // __lsx_vnori_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b(
++
++  // __lsx_vxor_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v(
++
++  // __lsx_vxori_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b(
++
++  // __lsx_vbitsel_v
++  // vd, vj, vk, va
++  // UV16QI, UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(
++
++  // __lsx_vbitseli_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(
++
++  // __lsx_vshuf4i_b
++  // vd, vj, ui8
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(
++
++  // __lsx_vshuf4i_h
++  // vd, vj, ui8
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(
++
++  // __lsx_vshuf4i_w
++  // vd, vj, ui8
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(
++
++  // __lsx_vreplgr2vr_b
++  // vd, rj
++  // V16QI, SI
++  v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(
++
++  // __lsx_vreplgr2vr_h
++  // vd, rj
++  // V8HI, SI
++  v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(
++
++  // __lsx_vreplgr2vr_w
++  // vd, rj
++  // V4SI, SI
++  v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(
++
++  // __lsx_vreplgr2vr_d
++  // vd, rj
++  // V2DI, DI
++  v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(
++
++  // __lsx_vpcnt_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(
++
++  // __lsx_vpcnt_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(
++
++  // __lsx_vpcnt_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(
++
++  // __lsx_vpcnt_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(
++
++  // __lsx_vclo_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b(
++
++  // __lsx_vclo_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h(
++
++  // __lsx_vclo_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w(
++
++  // __lsx_vclo_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d(
++
++  // __lsx_vclz_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b(
++
++  // __lsx_vclz_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h(
++
++  // __lsx_vclz_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w(
++
++  // __lsx_vclz_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d(
++
++  // __lsx_vpickve2gr_b
++  // rd, vj, ui4
++  // SI, V16QI, UQI
++  i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b(
++
++  // __lsx_vpickve2gr_h
++  // rd, vj, ui3
++  // SI, V8HI, UQI
++  i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h(
++
++  // __lsx_vpickve2gr_w
++  // rd, vj, ui2
++  // SI, V4SI, UQI
++  i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w(
++
++  // __lsx_vpickve2gr_d
++  // rd, vj, ui1
++  // DI, V2DI, UQI
++  i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d(
++
++  // __lsx_vpickve2gr_bu
++  // rd, vj, ui4
++  // USI, V16QI, UQI
++  u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu(
++
++  // __lsx_vpickve2gr_hu
++  // rd, vj, ui3
++  // USI, V8HI, UQI
++  u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu(
++
++  // __lsx_vpickve2gr_wu
++  // rd, vj, ui2
++  // USI, V4SI, UQI
++  u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu(
++
++  // __lsx_vpickve2gr_du
++  // rd, vj, ui1
++  // UDI, V2DI, UQI
++  u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du(
++
++  // __lsx_vinsgr2vr_b
++  // vd, rj, ui4
++  // V16QI, V16QI, SI, UQI
++  v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(
++
++  // __lsx_vinsgr2vr_h
++  // vd, rj, ui3
++  // V8HI, V8HI, SI, UQI
++  v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(
++
++  // __lsx_vinsgr2vr_w
++  // vd, rj, ui2
++  // V4SI, V4SI, SI, UQI
++  v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(
++
++  // __lsx_vinsgr2vr_d
++  // vd, rj, ui1
++  // V2DI, V2DI, SI, UQI
++  v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(
++
++  // __lsx_vfcmp_caf_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(
++
++  // __lsx_vfcmp_caf_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(
++
++  // __lsx_vfcmp_cor_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(
++
++  // __lsx_vfcmp_cor_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(
++
++  // __lsx_vfcmp_cun_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(
++
++  // __lsx_vfcmp_cun_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(
++
++  // __lsx_vfcmp_cune_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(
++
++  // __lsx_vfcmp_cune_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(
++
++  // __lsx_vfcmp_cueq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(
++
++  // __lsx_vfcmp_cueq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(
++
++  // __lsx_vfcmp_ceq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(
++
++  // __lsx_vfcmp_ceq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(
++
++  // __lsx_vfcmp_cne_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(
++
++  // __lsx_vfcmp_cne_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(
++
++  // __lsx_vfcmp_clt_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(
++
++  // __lsx_vfcmp_clt_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(
++
++  // __lsx_vfcmp_cult_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(
++
++  // __lsx_vfcmp_cult_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(
++
++  // __lsx_vfcmp_cle_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(
++
++  // __lsx_vfcmp_cle_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(
++
++  // __lsx_vfcmp_cule_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(
++
++  // __lsx_vfcmp_cule_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(
++
++  // __lsx_vfcmp_saf_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(
++
++  // __lsx_vfcmp_saf_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(
++
++  // __lsx_vfcmp_sor_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(
++
++  // __lsx_vfcmp_sor_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(
++
++  // __lsx_vfcmp_sun_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(
++
++  // __lsx_vfcmp_sun_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(
++
++  // __lsx_vfcmp_sune_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(
++
++  // __lsx_vfcmp_sune_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(
++
++  // __lsx_vfcmp_sueq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(
++
++  // __lsx_vfcmp_sueq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(
++
++  // __lsx_vfcmp_seq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(
++
++  // __lsx_vfcmp_seq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(
++
++  // __lsx_vfcmp_sne_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(
++
++  // __lsx_vfcmp_sne_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(
++
++  // __lsx_vfcmp_slt_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(
++
++  // __lsx_vfcmp_slt_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(
++
++  // __lsx_vfcmp_sult_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(
++
++  // __lsx_vfcmp_sult_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(
++
++  // __lsx_vfcmp_sle_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(
++
++  // __lsx_vfcmp_sle_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(
++
++  // __lsx_vfcmp_sule_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(
++
++  // __lsx_vfcmp_sule_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(
++
++  // __lsx_vfadd_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s(
++  // __lsx_vfadd_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d(
++
++  // __lsx_vfsub_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s(
++
++  // __lsx_vfsub_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d(
++
++  // __lsx_vfmul_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s(
++
++  // __lsx_vfmul_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d(
++
++  // __lsx_vfdiv_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s(
++
++  // __lsx_vfdiv_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d(
++
++  // __lsx_vfcvt_h_s
++  // vd, vj, vk
++  // V8HI, V4SF, V4SF
++  v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(
++
++  // __lsx_vfcvt_s_d
++  // vd, vj, vk
++  // V4SF, V2DF, V2DF
++  v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(
++
++  // __lsx_vfmin_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s(
++
++  // __lsx_vfmin_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d(
++
++  // __lsx_vfmina_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s(
++
++  // __lsx_vfmina_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d(
++
++  // __lsx_vfmax_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s(
++
++  // __lsx_vfmax_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d(
++
++  // __lsx_vfmaxa_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(
++
++  // __lsx_vfmaxa_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(
++
++  // __lsx_vfclass_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s(
++
++  // __lsx_vfclass_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d(
++
++  // __lsx_vfsqrt_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(
++
++  // __lsx_vfsqrt_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(
++
++  // __lsx_vfrecip_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s(
++
++  // __lsx_vfrecip_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d(
++
++  // __lsx_vfrint_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s(
++
++  // __lsx_vfrint_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d(
++
++  // __lsx_vfrsqrt_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(
++
++  // __lsx_vfrsqrt_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(
++
++  // __lsx_vflogb_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s(
++
++  // __lsx_vflogb_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d(
++
++  // __lsx_vfcvth_s_h
++  // vd, vj
++  // V4SF, V8HI
++  v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(
++
++  // __lsx_vfcvth_d_s
++  // vd, vj
++  // V2DF, V4SF
++  v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(
++
++  //gcc build fail
++
++  // __lsx_vfcvtl_s_h
++  // vd, vj
++  // V4SF, V8HI
++  v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(
++
++  // __lsx_vfcvtl_d_s
++  // vd, vj
++  // V2DF, V4SF
++  v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(
++
++  // __lsx_vftint_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(
++
++  // __lsx_vftint_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(
++
++  // __lsx_vftint_wu_s
++  // vd, vj
++  // UV4SI, V4SF
++  v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(
++
++  // __lsx_vftint_lu_d
++  // vd, vj
++  // UV2DI, V2DF
++  v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(
++
++  // __lsx_vftintrz_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(
++
++  // __lsx_vftintrz_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(
++
++  // __lsx_vftintrz_wu_s
++  // vd, vj
++  // UV4SI, V4SF
++  v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(
++
++  // __lsx_vftintrz_lu_d
++  // vd, vj
++  // UV2DI, V2DF
++  v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(
++
++  // __lsx_vffint_s_w
++  // vd, vj
++  // V4SF, V4SI
++  v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w(
++
++  // __lsx_vffint_d_l
++  // vd, vj
++  // V2DF, V2DI
++  v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l(
++
++  // __lsx_vffint_s_wu
++  // vd, vj
++  // V4SF, UV4SI
++  v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(
++
++  // __lsx_vffint_d_lu
++  // vd, vj
++  // V2DF, UV2DI
++  v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(
++
++  // __lsx_vandn_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v(
++
++  // __lsx_vneg_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b(
++
++  // __lsx_vneg_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h(
++
++  // __lsx_vneg_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w(
++
++  // __lsx_vneg_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d(
++
++  // __lsx_vmuh_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b(
++
++  // __lsx_vmuh_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h(
++
++  // __lsx_vmuh_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w(
++
++  // __lsx_vmuh_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d(
++
++  // __lsx_vmuh_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(
++
++  // __lsx_vmuh_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(
++
++  // __lsx_vmuh_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(
++
++  // __lsx_vmuh_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du(
++
++  // __lsx_vsllwil_h_b
++  // vd, vj, ui3
++  // V8HI, V16QI, UQI
++  v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(
++
++  // __lsx_vsllwil_w_h
++  // vd, vj, ui4
++  // V4SI, V8HI, UQI
++  v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(
++
++  // __lsx_vsllwil_d_w
++  // vd, vj, ui5
++  // V2DI, V4SI, UQI
++  v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(
++
++  // __lsx_vsllwil_hu_bu
++  // vd, vj, ui3
++  // UV8HI, UV16QI, UQI
++  v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(
++
++  // __lsx_vsllwil_wu_hu
++  // vd, vj, ui4
++  // UV4SI, UV8HI, UQI
++  v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(
++
++  // __lsx_vsllwil_du_wu
++  // vd, vj, ui5
++  // UV2DI, UV4SI, UQI
++  v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(
++
++  // __lsx_vsran_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(
++
++  // __lsx_vsran_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(
++
++  // __lsx_vsran_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(
++
++  // __lsx_vssran_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(
++
++  // __lsx_vssran_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(
++
++  // __lsx_vssran_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(
++
++  // __lsx_vssran_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(
++
++  // __lsx_vssran_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(
++
++  // __lsx_vssran_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(
++
++  // __lsx_vsrarn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(
++
++  // __lsx_vsrarn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(
++
++  // __lsx_vsrarn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(
++
++  // __lsx_vssrarn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(
++
++  // __lsx_vssrarn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(
++
++  // __lsx_vssrarn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(
++
++  // __lsx_vssrarn_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(
++
++  // __lsx_vssrarn_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(
++
++  // __lsx_vssrarn_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(
++
++  // __lsx_vsrln_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(
++
++  // __lsx_vsrln_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(
++
++  // __lsx_vsrln_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(
++
++  // __lsx_vssrln_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(
++
++  // __lsx_vssrln_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(
++
++  // __lsx_vssrln_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(
++
++  // __lsx_vsrlrn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(
++
++  // __lsx_vsrlrn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(
++
++  // __lsx_vsrlrn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(
++
++  // __lsx_vssrlrn_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(
++
++  // __lsx_vssrlrn_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(
++
++  // __lsx_vssrlrn_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(
++
++  // __lsx_vfrstpi_b
++  // vd, vj, ui5
++  // V16QI, V16QI, V16QI, UQI
++  v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(
++
++  // __lsx_vfrstpi_h
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, UQI
++  v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(
++
++  // __lsx_vfrstp_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(
++
++  // __lsx_vfrstp_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(
++
++  // __lsx_vshuf4i_d
++  // vd, vj, ui8
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(
++
++  // __lsx_vbsrl_v
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(
++
++  // __lsx_vbsll_v
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v(
++
++  // __lsx_vextrins_b
++  // vd, vj, ui8
++  // V16QI, V16QI, V16QI, UQI
++  v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b(
++
++  // __lsx_vextrins_h
++  // vd, vj, ui8
++  // V8HI, V8HI, V8HI, UQI
++  v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h(
++
++  // __lsx_vextrins_w
++  // vd, vj, ui8
++  // V4SI, V4SI, V4SI, UQI
++  v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w(
++
++  // __lsx_vextrins_d
++  // vd, vj, ui8
++  // V2DI, V2DI, V2DI, UQI
++  v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d(
++
++  // __lsx_vmskltz_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(
++
++  // __lsx_vmskltz_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(
++
++  // __lsx_vmskltz_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(
++
++  // __lsx_vmskltz_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(
++
++  // __lsx_vsigncov_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(
++
++  // __lsx_vsigncov_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(
++
++  // __lsx_vsigncov_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(
++
++  // __lsx_vsigncov_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(
++
++  // __lsx_vfmadd_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s(
++
++  // __lsx_vfmadd_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d(
++
++  // __lsx_vfmsub_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s(
++
++  // __lsx_vfmsub_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d(
++
++  // __lsx_vfnmadd_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(
++
++  // __lsx_vfnmadd_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(
++
++  // __lsx_vfnmsub_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(
++
++  // __lsx_vfnmsub_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(
++
++  // __lsx_vftintrne_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(
++
++  // __lsx_vftintrne_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(
++
++  // __lsx_vftintrp_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(
++
++  // __lsx_vftintrp_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(
++
++  // __lsx_vftintrm_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(
++
++  // __lsx_vftintrm_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(
++
++  // __lsx_vftint_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(
++
++  // __lsx_vffint_s_l
++  // vd, vj, vk
++  // V4SF, V2DI, V2DI
++  v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l(
++
++  // __lsx_vftintrz_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(
++
++  // __lsx_vftintrp_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(
++
++  // __lsx_vftintrm_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(
++
++  // __lsx_vftintrne_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(
++
++  // __lsx_vftintl_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(
++
++  // __lsx_vftinth_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(
++
++  // __lsx_vffinth_d_w
++  // vd, vj
++  // V2DF, V4SI
++  v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(
++
++  // __lsx_vffintl_d_w
++  // vd, vj
++  // V2DF, V4SI
++  v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(
++
++  // __lsx_vftintrzl_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(
++
++  // __lsx_vftintrzh_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(
++
++  // __lsx_vftintrpl_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(
++
++  // __lsx_vftintrph_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(
++
++  // __lsx_vftintrml_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(
++
++  // __lsx_vftintrmh_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(
++
++  // __lsx_vftintrnel_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(
++
++  // __lsx_vftintrneh_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(
++
++  // __lsx_vfrintrne_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(
++
++  // __lsx_vfrintrne_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(
++
++  // __lsx_vfrintrz_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(
++
++  // __lsx_vfrintrz_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(
++
++  // __lsx_vfrintrp_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(
++
++  // __lsx_vfrintrp_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(
++
++  // __lsx_vfrintrm_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(
++
++  // __lsx_vfrintrm_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(
++
++  // __lsx_vstelm_b
++  // vd, rj, si8, idx
++  // VOID, V16QI, CVPOINTER, SI, UQI
++  __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b(
++  // __lsx_vstelm_h
++  // vd, rj, si8, idx
++  // VOID, V8HI, CVPOINTER, SI, UQI
++  __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h(
++
++  // __lsx_vstelm_w
++  // vd, rj, si8, idx
++  // VOID, V4SI, CVPOINTER, SI, UQI
++  __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w(
++
++  // __lsx_vstelm_d
++  // vd, rj, si8, idx
++  // VOID, V2DI, CVPOINTER, SI, UQI
++  __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d(
++
++  // __lsx_vaddwev_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(
++
++  // __lsx_vaddwev_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(
++
++  // __lsx_vaddwev_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(
++
++  // __lsx_vaddwod_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(
++
++  // __lsx_vaddwod_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(
++
++  // __lsx_vaddwod_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(
++
++  // __lsx_vaddwev_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(
++
++  // __lsx_vaddwev_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(
++
++  // __lsx_vaddwev_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(
++
++  // __lsx_vaddwod_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(
++
++  // __lsx_vaddwod_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(
++
++  // __lsx_vaddwod_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(
++
++  // __lsx_vaddwev_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(
++
++  // __lsx_vaddwev_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(
++
++  // __lsx_vaddwev_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(
++
++  // __lsx_vaddwod_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(
++
++  // __lsx_vaddwod_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(
++
++  // __lsx_vaddwod_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(
++
++  // __lsx_vsubwev_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(
++
++  // __lsx_vsubwev_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(
++
++  // __lsx_vsubwev_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(
++
++  // __lsx_vsubwod_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(
++
++  // __lsx_vsubwod_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(
++
++  // __lsx_vsubwod_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(
++
++  // __lsx_vsubwev_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(
++
++  // __lsx_vsubwev_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(
++
++  // __lsx_vsubwev_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(
++
++  // __lsx_vsubwod_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(
++
++  // __lsx_vsubwod_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(
++
++  // __lsx_vsubwod_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(
++
++  // __lsx_vaddwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(
++
++  // __lsx_vaddwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(
++
++  // __lsx_vaddwev_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(
++
++  // __lsx_vaddwod_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(
++
++  // __lsx_vsubwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(
++
++  // __lsx_vsubwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(
++
++  // __lsx_vsubwev_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(
++
++  // __lsx_vsubwod_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(
++
++  // __lsx_vaddwev_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(
++
++  // __lsx_vaddwod_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(
++
++  // __lsx_vmulwev_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(
++
++  // __lsx_vmulwev_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(
++
++  // __lsx_vmulwev_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(
++
++  // __lsx_vmulwod_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(
++
++  // __lsx_vmulwod_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(
++
++  // __lsx_vmulwod_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(
++
++  // __lsx_vmulwev_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(
++
++  // __lsx_vmulwev_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(
++
++  // __lsx_vmulwev_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(
++
++  // __lsx_vmulwod_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(
++
++  // __lsx_vmulwod_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(
++
++  // __lsx_vmulwod_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(
++
++  // __lsx_vmulwev_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(
++
++  // __lsx_vmulwev_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(
++
++  // __lsx_vmulwev_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(
++
++  // __lsx_vmulwod_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(
++
++  // __lsx_vmulwod_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(
++
++  // __lsx_vmulwod_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(
++
++  // __lsx_vmulwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(
++
++  // __lsx_vmulwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(
++
++  // __lsx_vmulwev_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(
++
++  // __lsx_vmulwod_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(
++
++  // __lsx_vmulwev_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(
++
++  // __lsx_vmulwod_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(
++
++  // __lsx_vhaddw_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(
++
++  // __lsx_vhaddw_qu_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(
++
++  // __lsx_vhsubw_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(
++
++  // __lsx_vhsubw_qu_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(
++
++  // __lsx_vmaddwev_d_w
++  // vd, vj, vk
++  // V2DI, V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(
++
++  // __lsx_vmaddwev_w_h
++  // vd, vj, vk
++  // V4SI, V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(
++
++  // __lsx_vmaddwev_h_b
++  // vd, vj, vk
++  // V8HI, V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(
++
++  // __lsx_vmaddwev_d_wu
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV4SI, UV4SI
++  v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(
++
++  // __lsx_vmaddwev_w_hu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV8HI, UV8HI
++  v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(
++
++  // __lsx_vmaddwev_h_bu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV16QI, UV16QI
++  v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(
++
++  // __lsx_vmaddwod_d_w
++  // vd, vj, vk
++  // V2DI, V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(
++
++  // __lsx_vmaddwod_w_h
++  // vd, vj, vk
++  // V4SI, V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(
++
++  // __lsx_vmaddwod_h_b
++  // vd, vj, vk
++  // V8HI, V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(
++
++  // __lsx_vmaddwod_d_wu
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV4SI, UV4SI
++  v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(
++
++  // __lsx_vmaddwod_w_hu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV8HI, UV8HI
++  v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(
++
++  // __lsx_vmaddwod_h_bu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV16QI, UV16QI
++  v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(
++
++  // __lsx_vmaddwev_d_wu_w
++  // vd, vj, vk
++  // V2DI, V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(
++
++  // __lsx_vmaddwev_w_hu_h
++  // vd, vj, vk
++  // V4SI, V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(
++
++  // __lsx_vmaddwev_h_bu_b
++  // vd, vj, vk
++  // V8HI, V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(
++
++  // __lsx_vmaddwod_d_wu_w
++  // vd, vj, vk
++  // V2DI, V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(
++
++  // __lsx_vmaddwod_w_hu_h
++  // vd, vj, vk
++  // V4SI, V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(
++
++  // __lsx_vmaddwod_h_bu_b
++  // vd, vj, vk
++  // V8HI, V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(
++
++  // __lsx_vmaddwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(
++
++  // __lsx_vmaddwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(
++
++  // __lsx_vmaddwev_q_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(
++
++  // __lsx_vmaddwod_q_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(
++
++  // __lsx_vmaddwev_q_du_d
++  // vd, vj, vk
++  // V2DI, V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(
++
++  // __lsx_vmaddwod_q_du_d
++  // vd, vj, vk
++  // V2DI, V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(
++
++  // __lsx_vrotr_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b(
++
++  // __lsx_vrotr_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h(
++
++  // __lsx_vrotr_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w(
++
++  // __lsx_vrotr_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d(
++
++  // __lsx_vadd_q
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q(
++
++  // __lsx_vsub_q
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q(
++
++  // __lsx_vldrepl_b
++  // vd, rj, si12
++  // V16QI, CVPOINTER, SI
++  v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(
++
++  // __lsx_vldrepl_h
++  // vd, rj, si11
++  // V8HI, CVPOINTER, SI
++  v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(
++
++  // __lsx_vldrepl_w
++  // vd, rj, si10
++  // V4SI, CVPOINTER, SI
++  v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(
++
++  // __lsx_vldrepl_d
++  // vd, rj, si9
++  // V2DI, CVPOINTER, SI
++  v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(
++
++  // __lsx_vmskgez_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(
++
++  // __lsx_vmsknz_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(
++
++  // __lsx_vexth_h_b
++  // vd, vj
++  // V8HI, V16QI
++  v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(
++
++  // __lsx_vexth_w_h
++  // vd, vj
++  // V4SI, V8HI
++  v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(
++
++  // __lsx_vexth_d_w
++  // vd, vj
++  // V2DI, V4SI
++  v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(
++
++  // __lsx_vexth_q_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(
++
++  // __lsx_vexth_hu_bu
++  // vd, vj
++  // UV8HI, UV16QI
++  v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(
++
++  // __lsx_vexth_wu_hu
++  // vd, vj
++  // UV4SI, UV8HI
++  v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(
++
++  // __lsx_vexth_du_wu
++  // vd, vj
++  // UV2DI, UV4SI
++  v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(
++
++  // __lsx_vexth_qu_du
++  // vd, vj
++  // UV2DI, UV2DI
++  v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(
++
++  // __lsx_vrotri_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b(
++
++  // __lsx_vrotri_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h(
++
++  // __lsx_vrotri_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w(
++
++  // __lsx_vrotri_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d(
++
++  // __lsx_vextl_q_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(
++
++  // __lsx_vsrlni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(
++
++  // __lsx_vsrlni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(
++
++  // __lsx_vsrlni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(
++
++  // __lsx_vsrlni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(
++
++  // __lsx_vssrlni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(
++
++  // __lsx_vssrlni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(
++
++  // __lsx_vssrlni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(
++
++  // __lsx_vssrlni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(
++
++  // __lsx_vssrlni_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(
++
++  // __lsx_vssrlni_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(
++
++  // __lsx_vssrlni_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(
++
++  // __lsx_vssrlni_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(
++
++  // __lsx_vssrlrni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(
++
++  // __lsx_vssrlrni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(
++
++  // __lsx_vssrlrni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(
++
++  // __lsx_vssrlrni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(
++
++  // __lsx_vssrlrni_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(
++
++  // __lsx_vssrlrni_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(
++
++  // __lsx_vssrlrni_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(
++
++  // __lsx_vssrlrni_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(
++
++  // __lsx_vsrani_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(
++
++  // __lsx_vsrani_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(
++
++  // __lsx_vsrani_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(
++
++  // __lsx_vsrani_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(
++
++  // __lsx_vsrarni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(
++
++  // __lsx_vsrarni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(
++
++  // __lsx_vsrarni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(
++
++  // __lsx_vsrarni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(
++
++  // __lsx_vssrani_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(
++
++  // __lsx_vssrani_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(
++
++  // __lsx_vssrani_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(
++
++  // __lsx_vssrani_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(
++
++  // __lsx_vssrani_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(
++
++  // __lsx_vssrani_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(
++
++  // __lsx_vssrani_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(
++
++  // __lsx_vssrani_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(
++
++  // __lsx_vssrarni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(
++
++  // __lsx_vssrarni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(
++
++  // __lsx_vssrarni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(
++
++  // __lsx_vssrarni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(
++
++  // __lsx_vssrarni_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(
++
++  // __lsx_vssrarni_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(
++
++  // __lsx_vssrarni_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(
++
++  // __lsx_vssrarni_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(
++
++  // __lsx_vpermi_w
++  // vd, vj, ui8
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w(
++
++  // __lsx_vld
++  // vd, rj, si12
++  // V16QI, CVPOINTER, SI
++  v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld(
++
++  // __lsx_vst
++  // vd, rj, si12
++  // VOID, V16QI, CVPOINTER, SI
++  __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst(
++
++  // __lsx_vssrlrn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(
++
++  // __lsx_vssrlrn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(
++
++  // __lsx_vssrlrn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(
++
++  // __lsx_vssrln_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(
++
++  // __lsx_vssrln_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(
++
++  // __lsx_vssrln_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(
++
++  // __lsx_vorn_v
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v(
++
++  // __lsx_vldi
++  // vd, i13
++  // V2DI, HI
++  v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi(
++
++  // __lsx_vshuf_b
++  // vd, vj, vk, va
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b(
++
++  // __lsx_vldx
++  // vd, rj, rk
++  // V16QI, CVPOINTER, DI
++  v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx(
++
++  // __lsx_vstx
++  // vd, rj, rk
++  // VOID, V16QI, CVPOINTER, DI
++  __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx(
++
++  // __lsx_vextl_qu_du
++  // vd, vj
++  // UV2DI, UV2DI
++  v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(
++
++  // __lsx_bnz_v
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v(
++
++  // __lsx_bz_v
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v(
++
++  // __lsx_bnz_b
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b(
++
++  // __lsx_bnz_h
++  // rd, vj
++  // SI, UV8HI
++  i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h(
++
++  // __lsx_bnz_w
++  // rd, vj
++  // SI, UV4SI
++  i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w(
++
++  // __lsx_bnz_d
++  // rd, vj
++  // SI, UV2DI
++  i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d(
++
++  // __lsx_bz_b
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b(
++
++  // __lsx_bz_h
++  // rd, vj
++  // SI, UV8HI
++  i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h(
++
++  // __lsx_bz_w
++  // rd, vj
++  // SI, UV4SI
++  i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w(
++
++  // __lsx_bz_d
++  // rd, vj
++  // SI, UV2DI
++  i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d(
++
++  v16i8_r = __lsx_vsrlrni_b_h(v16i8_a, v16i8_b, 2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(
++
++  v8i16_r = __lsx_vsrlrni_h_w(v8i16_a, v8i16_b, 2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(
++
++  v4i32_r = __lsx_vsrlrni_w_d(v4i32_a, v4i32_b, 2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(
++
++  v2i64_r = __lsx_vsrlrni_d_q(v2i64_a, v2i64_b, 2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(
++
++  v16i8_r = __lsx_vrepli_b(2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrepli.b(
++
++  v8i16_r = __lsx_vrepli_h(2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrepli.h(
++
++  v4i32_r = __lsx_vrepli_w(2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrepli.w(
++
++  v2i64_r = __lsx_vrepli_d(2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrepli.d(
++}
+diff --git a/clang/test/CodeGen/loongarch-inline-asm-modifiers.c b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c
+new file mode 100644
+index 000000000..412eca2bd
+--- /dev/null
++++ b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c
+@@ -0,0 +1,50 @@
++// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \
++// RUN: | FileCheck %s
++
++// This checks that the frontend will accept inline asm operand modifiers
++
++int printf(const char*, ...);
++
++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16)));
++typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32)));
++
++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w    $0,$1;\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4))
++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w    $0,${1:D};\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4))
++// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f"
++// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f"
++int b[8] = {0,1,2,3,4,5,6,7};
++int  main()
++{
++  int i;
++  v2i64 v2i64_r;
++  v4i64 v4i64_r;
++
++  // The first word. Notice, no 'D'
++  {asm (
++  "ld.w    %0,%1;\n"
++  : "=r" (i)
++  : "m" (*(b+4)));}
++
++  printf("%d\n",i);
++
++  // The second word
++  {asm (
++  "ld.w    %0,%D1;\n"
++  : "=r" (i)
++  : "m" (*(b+4))
++  );}
++
++  // LSX registers
++  { asm("vldi %w0,1"
++        : "=f"(v2i64_r)); }
++
++  printf("%d\n", i);
++
++  // LASX registers
++  { asm("xldi %u0,1"
++        : "=f"(v4i64_r)); }
++
++  printf("%d\n",i);
++
++  return 1;
++}
+diff --git a/clang/test/CodeGen/loongarch-inline-asm.c b/clang/test/CodeGen/loongarch-inline-asm.c
+new file mode 100644
+index 000000000..1f995ac79
+--- /dev/null
++++ b/clang/test/CodeGen/loongarch-inline-asm.c
+@@ -0,0 +1,31 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s
++
++int data;
++
++void m () {
++  asm("ld.w $r1, %0" :: "m"(data));
++  // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(ptr elementtype(i32) @data)
++}
++
++void ZC () {
++  asm("ll.w $r1, %0" :: "ZC"(data));
++  // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(ptr elementtype(i32) @data)
++}
++
++void ZB () {
++  asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data));
++  // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(ptr elementtype(i32) @data)
++}
++
++void R () {
++  asm("ld.w $r1, %0" :: "R"(data));
++  // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(ptr elementtype(i32) @data)
++}
++
++int *p;
++void preld () {
++  asm("preld 0, %0, 2" :: "r"(p));
++  // CHECK: %0 = load ptr, ptr @p, align 8
++  // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(ptr %0)
++}
+diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp
+new file mode 100644
+index 000000000..dc5ffaf08
+--- /dev/null
++++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp
+@@ -0,0 +1,95 @@
++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \
++// RUN:   -emit-llvm %s -o - | FileCheck %s
++
++#include <stdint.h>
++
++/// Ensure that fields inherited from a parent struct are treated in the same
++/// way as fields directly in the child for the purposes of LoongArch ABI rules.
++
++struct parent1_int32_s {
++  int32_t i1;
++};
++
++struct child1_int32_s : parent1_int32_s {
++  int32_t i2;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce)
++struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) {
++  return a;
++}
++
++struct parent2_int32_s {
++  int32_t i1;
++};
++
++struct child2_float_s : parent2_int32_s {
++  float f1;
++};
++
++// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1)
++struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) {
++  return a;
++}
++
++struct parent3_float_s {
++  float f1;
++};
++
++struct child3_int64_s : parent3_float_s {
++  int64_t i1;
++};
++
++// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1)
++struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) {
++  return a;
++}
++
++struct parent4_double_s {
++  double d1;
++};
++
++struct child4_double_s : parent4_double_s {
++  double d1;
++};
++
++// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1)
++struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) {
++  return a;
++}
++
++/// When virtual inheritance is used, the resulting struct isn't eligible for
++/// passing in registers.
++
++struct parent5_virtual_s {
++  int32_t i1;
++};
++
++struct child5_virtual_s : virtual parent5_virtual_s {
++  float f1;
++};
++
++// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr{{.*}} %this, ptr{{.*}} dereferenceable(12) %0)
++struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) {
++  return a;
++}
++
++/// Check for correct lowering in the presence of diamoned inheritance.
++
++struct parent6_float_s {
++  float f1;
++};
++
++struct child6a_s : parent6_float_s {
++};
++
++struct child6b_s : parent6_float_s {
++};
++
++struct grandchild_6_s : child6a_s, child6b_s {
++};
++
++// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1)
++struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) {
++  return a;
++}
+diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp
+index 7c11fe671..56eb5b708 100644
+--- a/clang/test/Driver/baremetal.cpp
++++ b/clang/test/Driver/baremetal.cpp
+@@ -105,7 +105,7 @@
+ // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include"
+ 
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:      -target aarch64-none-elf \
++// RUN:      -target aarch64-none-elf --sysroot= \
+ // RUN:   | FileCheck --check-prefix=CHECK-AARCH64-NO-HOST-INC %s
+ // Verify that the bare metal driver does not include any host system paths:
+ // CHECK-AARCH64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]]
+diff --git a/clang/test/Driver/hexagon-toolchain-linux.c b/clang/test/Driver/hexagon-toolchain-linux.c
+index 05ae17339..986c2dd61 100644
+--- a/clang/test/Driver/hexagon-toolchain-linux.c
++++ b/clang/test/Driver/hexagon-toolchain-linux.c
+@@ -100,7 +100,7 @@
+ // -----------------------------------------------------------------------------
+ // internal-isystem for linux with and without musl
+ // -----------------------------------------------------------------------------
+-// RUN: %clang -### -target hexagon-unknown-linux-musl \
++// RUN: %clang -### -target hexagon-unknown-linux-musl --sysroot= \
+ // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+ // RUN:   -resource-dir=%S/Inputs/resource_dir \
+ // RUN:   %s 2>&1 \
+@@ -110,7 +110,7 @@
+ // CHECK008-SAME: {{^}} "-internal-isystem" "[[RESOURCE]]/include"
+ // CHECK008-SAME: {{^}} "-internal-externc-isystem" "[[INSTALLED_DIR]]/../target/hexagon/include"
+ 
+-// RUN: %clang -### -target hexagon-unknown-linux \
++// RUN: %clang -### -target hexagon-unknown-linux --sysroot= \
+ // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+ // RUN:   -resource-dir=%S/Inputs/resource_dir \
+ // RUN:   %s 2>&1 \
+diff --git a/clang/test/Driver/loongarch-alignment-feature.c b/clang/test/Driver/loongarch-alignment-feature.c
+new file mode 100644
+index 000000000..2270ff536
+--- /dev/null
++++ b/clang/test/Driver/loongarch-alignment-feature.c
+@@ -0,0 +1,8 @@
++// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t
++// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s
++
++// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t
++// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s
++
++// CHECK-UNALIGNED: "-target-feature" "+unaligned-access"
++// CHECK-ALIGNED: "-target-feature" "-unaligned-access"
+diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c
+new file mode 100644
+index 000000000..196862229
+--- /dev/null
++++ b/clang/test/Driver/loongarch-march.c
+@@ -0,0 +1,15 @@
++/// This test checks the valid cpu model which is supported by LoongArch.
++
++// RUN: %clang --target=loongarch64 -march=la264 -emit-llvm -### %s 2> %t
++//  | FileCheck -check-prefix=LA264 %t %s
++// RUN: %clang --target=loongarch64 -march=la364 -emit-llvm -### %s 2> %t
++//  | FileCheck -check-prefix=LA364 %t %s
++// RUN: %clang --target=loongarch64 -march=la464 -emit-llvm -### %s 2> %t
++//  | FileCheck -check-prefix=LA464 %t %s
++// RUN: %clang --target=loongarch64 -march=xxx -emit-llvm -### %s 2> %t
++//  | FileCheck -check-prefix=INVALID %t %s
++
++// LA264: "-target-cpu la264" "-target-abi lp64"
++// LA364: "-target-cpu la364" "-target-abi lp64"
++// LA464: "-target-cpu la464" "-target-abi lp64"
++// INVALID: error: unknown target CPU 'xxx'
+diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c
+new file mode 100644
+index 000000000..a32853d17
+--- /dev/null
++++ b/clang/test/Preprocessor/init-loongarch.c
+@@ -0,0 +1,10 @@
++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | grep loongarch | FileCheck %s
++
++// CHECK: #define __loongarch64 1
++// CHECK-NEXT: #define __loongarch__ 1
++// CHECK-NEXT: #define __loongarch_double_float 1
++// CHECK-NEXT: #define __loongarch_fpr 64
++// CHECK-NEXT: #define __loongarch_frlen 64
++// CHECK-NEXT: #define __loongarch_grlen 64
++// CHECK-NEXT: #define __loongarch_hard_float 1
++// CHECK-NEXT: #define __loongarch_lp64 1
+diff --git a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp
+index 75928d912..3350ee3fe 100644
+--- a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp
++++ b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp
+@@ -104,6 +104,11 @@ extern "C" int throw_exception() {
+   if (Triple.isPPC())
+     return;
+ 
++  // FIXME: LoongArch64 fails due to `Symbols not found:
++  // [DW.ref.__gxx_personality_v0]`
++  if (Triple.isLoongArch64())
++    return;
++
+   // FIXME: ARM fails due to `Not implemented relocation type!`
+   if (Triple.isARM())
+     return;
diff --git a/clang.spec b/clang.spec
index 639e472..b7d3cbb 100644
--- a/clang.spec
+++ b/clang.spec
@@ -1,4 +1,4 @@
-%define anolis_release .0.2
+%define anolis_release .0.3
 %bcond_with compat_build
 %bcond_without check
 
@@ -68,7 +68,7 @@ Patch4:     0001-cmake-Allow-shared-libraries-to-customize-the-soname.patch
 Patch5:     0001-Work-around-gcc-miscompile.patch
 Patch7:     0010-PATCH-clang-Produce-DWARF4-by-default.patch
 Patch8:     disable-recommonmark.patch
-
+Patch9:     0001-Support-LoongArch.patch
 
 # Patches for clang-tools-extra
 %if %{without compat_build}
@@ -77,6 +77,9 @@ Patch201:  0001-clang-tools-extra-Make-test-dependency-on-LLVMHello-.patch
 
 BuildRequires:	gcc
 BuildRequires:	gcc-c++
+%ifarch loongarch64
+BuildRequires:	clang
+%endif
 BuildRequires:	cmake
 BuildRequires:	ninja-build
 %if %{with compat_build}
@@ -165,7 +168,9 @@ libomp-devel to enable -fopenmp.
 Summary: Runtime library for clang
 Requires: %{name}-resource-filesystem%{?_isa} = %{version}
 # RHEL specific: Use libstdc++ from gcc12 by default. rhbz#2064507
+%ifnarch loongarch64
 Requires: gcc-toolset-12-gcc-c++
+%endif
 Recommends: compiler-rt%{?_isa} = %{version}
 # libomp-devel is required, so clang can find the omp.h header when compiling
 # with -fopenmp.
@@ -323,6 +328,10 @@ CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized"
 # We set CLANG_DEFAULT_PIE_ON_LINUX=OFF to match the default used by Fedora's GCC.
 %cmake .. -G Ninja \
 	-DCLANG_DEFAULT_PIE_ON_LINUX=OFF \
+%ifarch loongarch64
+	-DCMAKE_C_COMPILER=clang \
+	-DCMAKE_CXX_COMPILER=clang++ \
+%endif
 	-DLLVM_PARALLEL_LINK_JOBS=1 \
 	-DLLVM_LINK_LLVM_DYLIB:BOOL=ON \
 	-DCMAKE_BUILD_TYPE=RelWithDebInfo \
@@ -374,8 +383,12 @@ CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized"
 %ifarch %{arm}
 	-DCLANG_DEFAULT_LINKER=lld \
 %endif
+%ifnarch loongarch64
 	-DCLANG_DEFAULT_UNWINDLIB=libgcc \
 	-DGCC_INSTALL_PREFIX=/opt/rh/gcc-toolset-12/root/usr
+%else
+	-DCLANG_DEFAULT_UNWINDLIB=libgcc
+%endif
 
 %cmake_build
 
@@ -608,6 +621,11 @@ false
 
 %endif
 %changelog
+* Fri Aug 11 2023 Chen Li <chenli@loongson.cn> - 15.0.7-1.0.3
+- Support LoongArch
+- Workaround: add buildrequires of clang on loongarch64 as
+  the missing of gcc-toolset-12 currently
+
 * Wed Jul 19 2023 Zhao Hang <wb-zh951434@alibaba-inc.com> - 15.0.7-1.0.2
 - Add loongarch64 arch
 
-- 
Gitee