From 143da2bf11c0fbfbebc89682bfcdf3752d850045 Mon Sep 17 00:00:00 2001 From: chenli Date: Tue, 8 Aug 2023 20:01:42 +0800 Subject: [PATCH] Support LoongArch --- 0001-Support-LoongArch.patch | 24842 +++++++++++++++++++++++++++++++++ clang.spec | 22 +- 2 files changed, 24862 insertions(+), 2 deletions(-) create mode 100644 0001-Support-LoongArch.patch diff --git a/0001-Support-LoongArch.patch b/0001-Support-LoongArch.patch new file mode 100644 index 0000000..7f4d71f --- /dev/null +++ b/0001-Support-LoongArch.patch @@ -0,0 +1,24842 @@ +diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt +index 5127512fe..8383e6fae 100644 +--- a/clang/bindings/python/tests/CMakeLists.txt ++++ b/clang/bindings/python/tests/CMakeLists.txt +@@ -40,7 +40,7 @@ endif() + # addressed. + # SystemZ has broken Python/FFI interface: + # https://reviews.llvm.org/D52840#1265716 +-if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$") ++if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$") + set(RUN_PYTHON_TESTS FALSE) + endif() + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +new file mode 100644 +index 000000000..75d7e77c1 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -0,0 +1,1990 @@ ++//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific builtin function database. Users of ++// this file must define the BUILTIN macro to make use of this information. ++// ++//===----------------------------------------------------------------------===// ++ ++// The format of this database matches clang/Basic/Builtins.def. ++ ++// LoongArch LSX ++ ++BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc") ++BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrm_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrm_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrp_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrp_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrz_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrz_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrne_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrne_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc") ++BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc") ++BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc") ++BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc") ++BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc") ++BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc") ++BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc") ++BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc") ++ ++BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc") ++BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc") ++BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc") ++ ++//LoongArch LASX ++ ++BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc") ++ ++ ++BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc") ++BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc") ++ ++BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrne_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrne_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrz_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrz_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrp_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrp_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrm_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrm_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc") ++BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc") ++ ++BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc") ++BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc") ++BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc") ++ ++ ++// LoongArch BASE ++ ++BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd_d, "ULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc") ++BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc") ++BUILTIN(__builtin_loongarch_cacop_d, "viULiLi", "nc") ++BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_tlbclr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbflush, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbfill, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbrd, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbwr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc") ++BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_break, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc") ++#undef BUILTIN +diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h +index d8ad9858d..5af3d2099 100644 +--- a/clang/include/clang/Basic/TargetBuiltins.h ++++ b/clang/include/clang/Basic/TargetBuiltins.h +@@ -150,6 +150,16 @@ namespace clang { + }; + } // namespace RISCV + ++ /// LoongArch builtins ++ namespace LoongArch { ++ enum { ++ LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, ++#define BUILTIN(ID, TYPE, ATTRS) BI##ID, ++#include "clang/Basic/BuiltinsLoongArch.def" ++ LastTSBuiltin ++ }; ++ } // namespace LoongArch ++ + /// Flags to identify the types for overloaded Neon builtins. + /// + /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. +@@ -341,7 +351,8 @@ namespace clang { + PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, + X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, + Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, +- SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); ++ SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, ++ LoongArch::LastTSBuiltin}); + + } // end namespace clang. + +diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def +index 9501cca76..8ea4becef 100644 +--- a/clang/include/clang/Basic/TargetCXXABI.def ++++ b/clang/include/clang/Basic/TargetCXXABI.def +@@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") + /// - representation of member function pointers adjusted as in ARM. + ITANIUM_CXXABI(GenericMIPS, "mips") + ++/// The generic LoongArch ABI is a modified version of the Itanium ABI. ++/// ++/// At the moment, only change from the generic ABI in this case is: ++/// - representation of member function pointers adjusted as in ARM. ++ITANIUM_CXXABI(GenericLoongArch, "loongarch") ++ + /// The WebAssembly ABI is a modified version of the Itanium ABI. + /// + /// The changes from the Itanium ABI are: +diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h +index e727f85ed..507cf580e 100644 +--- a/clang/include/clang/Basic/TargetCXXABI.h ++++ b/clang/include/clang/Basic/TargetCXXABI.h +@@ -102,6 +102,9 @@ public: + case GenericAArch64: + return T.isAArch64(); + ++ case GenericLoongArch: ++ return T.isLoongArch(); ++ + case GenericMIPS: + return T.isMIPS(); + +@@ -166,6 +169,7 @@ public: + case Fuchsia: + case GenericARM: + case GenericAArch64: ++ case GenericLoongArch: + case GenericMIPS: + // TODO: ARM-style pointers to member functions put the discriminator in + // the this adjustment, so they don't require functions to have any +@@ -250,6 +254,7 @@ public: + case GenericItanium: + case iOS: // old iOS compilers did not follow this rule + case Microsoft: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return true; +@@ -288,6 +293,7 @@ public: + case GenericAArch64: + case GenericARM: + case iOS: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return UseTailPaddingUnlessPOD03; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 3cab37b21..8a264ac42 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -184,6 +184,8 @@ def m_x86_Features_Group : OptionGroup<"">, + Group, Flags<[CoreOption]>, DocName<"X86">; + def m_riscv_Features_Group : OptionGroup<"">, + Group, DocName<"RISCV">; ++def m_loongarch_Features_Group : OptionGroup<"">, ++ Group, DocName<"LoongArch">; + + def m_libc_Group : OptionGroup<"">, Group, + Flags<[HelpHidden]>; +@@ -3491,12 +3493,15 @@ def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group, Group, + HelpText<"Enable use of experimental RISC-V extensions.">; + +-def munaligned_access : Flag<["-"], "munaligned-access">, Group, +- HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">; +-def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, +- HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">; ++def munaligned_access : Flag<["-"], "munaligned-access">, Group, ++ HelpText<"Allow memory accesses to be unaligned">; ++def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, ++ HelpText<"Force all memory accesses to be aligned">; + def mstrict_align : Flag<["-"], "mstrict-align">, Alias, Flags<[CC1Option,HelpHidden]>, + HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; ++def mno_strict_align : Flag<["-"], "mno-strict-align">, Group, ++ Flags<[CC1Option,HelpHidden]>, Alias, ++ HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">; + def mno_thumb : Flag<["-"], "mno-thumb">, Group; + def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, + HelpText<"Disallow generation of complex IT blocks.">; +@@ -3824,6 +3829,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> + def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; ++def mlsx : Flag<["-"], "mlsx">, Group, ++ HelpText<"Use LARCH Loongson LSX instructions.">; ++def mno_lsx : Flag<["-"], "mno-lsx">, Group, ++ HelpText<"Disable LARCH Loongson LSX instructions.">; ++def mlasx : Flag<["-"], "mlasx">, Group, ++ HelpText<"Enable LARCH Loongson LASX instructions.">; ++def mno_lasx : Flag<["-"], "mno-lasx">, Group, ++ HelpText<"Disable LARCH Loongson LASX instructions.">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; +diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h +index 681a76dfa..5249fe601 100644 +--- a/clang/include/clang/Sema/Sema.h ++++ b/clang/include/clang/Sema/Sema.h +@@ -13117,6 +13117,9 @@ private: + bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum); + bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + CallExpr *TheCall); ++ bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall); + + bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); + bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); +diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap +index 01bce7771..a21e2beeb 100644 +--- a/clang/include/clang/module.modulemap ++++ b/clang/include/clang/module.modulemap +@@ -42,6 +42,7 @@ module Clang_Basic { + textual header "Basic/BuiltinsHexagon.def" + textual header "Basic/BuiltinsHexagonDep.def" + textual header "Basic/BuiltinsHexagonMapCustomDep.def" ++ textual header "Basic/BuiltinsLoongArch.def" + textual header "Basic/BuiltinsMips.def" + textual header "Basic/BuiltinsNEON.def" + textual header "Basic/BuiltinsNVPTX.def" +diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp +index cfd7bf604..c6e1e9eed 100644 +--- a/clang/lib/AST/ASTContext.cpp ++++ b/clang/lib/AST/ASTContext.cpp +@@ -900,6 +900,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericAArch64: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +@@ -11747,6 +11748,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + case TargetCXXABI::GenericAArch64: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::GenericARM: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::iOS: + case TargetCXXABI::WebAssembly: +diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt +index c38c9fddb..f0f3839a7 100644 +--- a/clang/lib/Basic/CMakeLists.txt ++++ b/clang/lib/Basic/CMakeLists.txt +@@ -82,6 +82,7 @@ add_clang_library(clangBasic + Targets/Hexagon.cpp + Targets/Lanai.cpp + Targets/Le64.cpp ++ Targets/LoongArch.cpp + Targets/M68k.cpp + Targets/MSP430.cpp + Targets/Mips.cpp +diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp +index 2d6ef9984..d63468161 100644 +--- a/clang/lib/Basic/Targets.cpp ++++ b/clang/lib/Basic/Targets.cpp +@@ -24,6 +24,7 @@ + #include "Targets/Hexagon.h" + #include "Targets/Lanai.h" + #include "Targets/Le64.h" ++#include "Targets/LoongArch.h" + #include "Targets/M68k.h" + #include "Targets/MSP430.h" + #include "Targets/Mips.h" +@@ -327,6 +328,25 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, + case llvm::Triple::le64: + return new Le64TargetInfo(Triple, Opts); + ++#if 0 ++ //TODO: support it in future ++ case llvm::Triple::loongarch32: ++ switch (os) { ++ case llvm::Triple::Linux: ++ return new LinuxTargetInfo(Triple, Opts); ++ default: ++ return new LoongArchTargetInfo(Triple, Opts); ++ } ++#endif ++ ++ case llvm::Triple::loongarch64: ++ switch (os) { ++ case llvm::Triple::Linux: ++ return new LinuxTargetInfo(Triple, Opts); ++ default: ++ return new LoongArchTargetInfo(Triple, Opts); ++ } ++ + case llvm::Triple::ppc: + if (Triple.isOSDarwin()) + return new DarwinPPC32TargetInfo(Triple, Opts); +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +new file mode 100644 +index 000000000..7f5632327 +--- /dev/null ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -0,0 +1,184 @@ ++//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements LoongArch TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "Targets.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Basic/MacroBuilder.h" ++#include "clang/Basic/TargetBuiltins.h" ++#include "llvm/ADT/StringSwitch.h" ++ ++using namespace clang; ++using namespace clang::targets; ++ ++const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = { ++#define BUILTIN(ID, TYPE, ATTRS) \ ++ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, ++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ ++ {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr}, ++#include "clang/Basic/BuiltinsLoongArch.def" ++}; ++ ++bool LoongArchTargetInfo::processorSupportsGPR64() const { ++ return llvm::StringSwitch(CPU) ++ .Case("la264", true) ++ .Case("la364", true) ++ .Case("la464", true) ++ .Default(false); ++ return false; ++} ++ ++static constexpr llvm::StringLiteral ValidCPUNames[] = { ++ {"la264"}, {"la364"}, {"la464"}}; ++ ++bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { ++ return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); ++} ++ ++void LoongArchTargetInfo::fillValidCPUList( ++ SmallVectorImpl &Values) const { ++ Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); ++} ++ ++void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const { ++ Builder.defineMacro("__loongarch__"); ++ unsigned GRLen = getRegisterWidth(); ++ Builder.defineMacro("__loongarch_grlen", Twine(GRLen)); ++ if (GRLen == 64) ++ Builder.defineMacro("__loongarch64"); ++ ++ if (ABI == "lp32") { ++ Builder.defineMacro("__loongarch32"); ++ } else { ++ Builder.defineMacro("__loongarch_lp64"); ++ } ++ ++ if (ABI == "lp32") { ++ Builder.defineMacro("_ABILP32", "1"); ++ } else if (ABI == "lpx32") { ++ Builder.defineMacro("_ABILPX32", "2"); ++ } else if (ABI == "lp64") { ++ Builder.defineMacro("_ABILP64", "3"); ++ Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64"); ++ } else ++ llvm_unreachable("Invalid ABI."); ++ ++ Builder.defineMacro("__REGISTER_PREFIX__", ""); ++ ++ switch (FloatABI) { ++ case HardFloat: ++ Builder.defineMacro("__loongarch_hard_float", Twine(1)); ++ Builder.defineMacro(IsSingleFloat ? "__loongarch_single_float" ++ : "__loongarch_double_float", ++ Twine(1)); ++ break; ++ case SoftFloat: ++ Builder.defineMacro("__loongarch_soft_float", Twine(1)); ++ break; ++ } ++ ++ switch (FPMode) { ++ case FP32: ++ Builder.defineMacro("__loongarch_fpr", Twine(32)); ++ Builder.defineMacro("__loongarch_frlen", Twine(32)); ++ break; ++ case FP64: ++ Builder.defineMacro("__loongarch_fpr", Twine(64)); ++ Builder.defineMacro("__loongarch_frlen", Twine(64)); ++ break; ++ } ++ ++ if (HasLSX) ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ ++ if (HasLASX) ++ Builder.defineMacro("__loongarch_asx", Twine(1)); ++ ++ Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(0))); ++ Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth())); ++ Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth())); ++ ++ Builder.defineMacro("_LOONGARCH_ARCH", "\"" + CPU + "\""); ++ Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(CPU).upper()); ++ ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); ++ ++ // 32-bit loongarch processors don't have the necessary lld/scd instructions ++ // found in 64-bit processors. In the case of lp32 on a 64-bit processor, ++ // the instructions exist but using them violates the ABI since they ++ // require 64-bit GPRs and LP32 only supports 32-bit GPRs. ++ if (ABI == "lpx32" || ABI == "lp64") ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); ++} ++ ++bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { ++ return llvm::StringSwitch(Feature) ++ .Case("fp64", FPMode == FP64) ++ .Case("lsx", HasLSX) ++ .Case("lasx", HasLASX) ++ .Default(false); ++} ++ ++ArrayRef LoongArchTargetInfo::getTargetBuiltins() const { ++ return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - ++ Builtin::FirstTSBuiltin); ++} ++ ++bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const { ++ // FIXME: It's valid to use LP32 on a 64-bit CPU but the backend can't handle ++ // this yet. It's better to fail here than on the backend assertion. ++ if (processorSupportsGPR64() && ABI == "lp32") { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; ++ } ++ ++ // 64-bit ABI's require 64-bit CPU's. ++ if (!processorSupportsGPR64() && (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; ++ } ++ ++ // FIXME: It's valid to use lp32 on a loongarch64 triple but the backend ++ // can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch64() && ABI == "lp32") { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ // FIXME: It's valid to use lpx32/lp64 on a loongarch32 triple but the backend ++ // can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch32() && (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ // -mfp32 and lpx32/lp64 ABIs are incompatible ++ if (FPMode != FP64 && !IsSingleFloat && ++ (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << ABI; ++ return false; ++ } ++ ++ if (FPMode != FP64 && (CPU == "la264" || CPU == "la364" || CPU == "la464")) { ++ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << CPU; ++ return false; ++ } ++ ++ return true; ++} +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +new file mode 100644 +index 000000000..ef18cc887 +--- /dev/null ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -0,0 +1,402 @@ ++//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares LoongArch TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H ++#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H ++ ++#include "clang/Basic/TargetInfo.h" ++#include "clang/Basic/TargetOptions.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/Support/Compiler.h" ++ ++namespace clang { ++namespace targets { ++ ++class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { ++ void setDataLayout() { ++ StringRef Layout; ++ ++ if (ABI == "lp32") ++ Layout = "m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"; ++ else if (ABI == "lpx32") ++ Layout = "m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else if (ABI == "lp64") ++ Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else ++ llvm_unreachable("Invalid ABI"); ++ ++ resetDataLayout(("e-" + Layout).str()); ++ } ++ ++ static const Builtin::Info BuiltinInfo[]; ++ std::string CPU; ++ bool IsSingleFloat; ++ enum LoongArchFloatABI { HardFloat, SoftFloat } FloatABI; ++ bool HasLSX; ++ bool HasLASX; ++ ++protected: ++ enum FPModeEnum { FP32, FP64 } FPMode; ++ std::string ABI; ++ ++public: ++ LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) ++ : TargetInfo(Triple), IsSingleFloat(false), FloatABI(HardFloat), ++ HasLSX(false), HasLASX(false), FPMode(FP64) { ++ TheCXXABI.set(TargetCXXABI::GenericLoongArch); ++ ++ if (Triple.isLoongArch32()) ++ setABI("lp32"); ++ else if (Triple.getEnvironment() == llvm::Triple::GNUABILPX32) ++ setABI("lpx32"); ++ else ++ setABI("lp64"); ++ ++ // Currently, CPU only supports 'la464' in LA. ++ if ( ABI == "lp64") ++ CPU = "la464"; ++ } ++ ++ bool processorSupportsGPR64() const; ++ ++ StringRef getABI() const override { return ABI; } ++ ++ bool setABI(const std::string &Name) override { ++ if (Name == "lp32") { ++ setLP32ABITypes(); ++ ABI = Name; ++ return true; ++ } ++ ++ if (Name == "lpx32") { ++ //setLPX32ABITypes(); ++ //ABI = Name; ++ //return true; ++ //TODO: implement ++ return false; ++ } ++ if (Name == "lp64") { ++ setLP64ABITypes(); ++ ABI = Name; ++ return true; ++ } ++ return false; ++ } ++ ++ void setLP32ABITypes() { ++ Int64Type = SignedLongLong; ++ IntMaxType = Int64Type; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble(); ++ LongDoubleWidth = LongDoubleAlign = 64; ++ LongWidth = LongAlign = 32; ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; ++ PointerWidth = PointerAlign = 32; ++ PtrDiffType = SignedInt; ++ SizeType = UnsignedInt; ++ SuitableAlign = 64; ++ } ++ ++ void setLPX32LP64ABITypes() { ++ LongDoubleWidth = LongDoubleAlign = 128; ++ LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ if (getTriple().isOSFreeBSD()) { ++ LongDoubleWidth = LongDoubleAlign = 64; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble(); ++ } ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; ++ SuitableAlign = 128; ++ } ++ ++ void setLP64ABITypes() { ++ setLPX32LP64ABITypes(); ++ if (getTriple().isOSOpenBSD()) { ++ Int64Type = SignedLongLong; ++ } else { ++ Int64Type = SignedLong; ++ } ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 64; ++ PointerWidth = PointerAlign = 64; ++ PtrDiffType = SignedLong; ++ SizeType = UnsignedLong; ++ } ++ ++ void setLPX32ABITypes() { ++ setLPX32LP64ABITypes(); ++ Int64Type = SignedLongLong; ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 32; ++ PointerWidth = PointerAlign = 32; ++ PtrDiffType = SignedInt; ++ SizeType = UnsignedInt; ++ } ++ ++ bool isValidCPUName(StringRef Name) const override; ++ void fillValidCPUList(SmallVectorImpl &Values) const override; ++ ++ bool setCPU(const std::string &Name) override { ++ CPU = Name; ++ return isValidCPUName(Name); ++ } ++ ++ const std::string &getCPU() const { return CPU; } ++ bool ++ initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, ++ StringRef CPU, ++ const std::vector &FeaturesVec) const override { ++#if 0 ++ if (CPU.empty()) ++ CPU = getCPU(); ++ Features[CPU] = true; ++#else ++// if (CPU == "la464") ++// Features["loongarch64"] = true; ++ ++//FIXME: we need this? ++// if (CPU == "la464") ++// Features["64bit"] = true; ++#endif ++ return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); ++ } ++ ++ void getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const override; ++ ++ ArrayRef getTargetBuiltins() const override; ++ ++ bool hasFeature(StringRef Feature) const override; ++ ++ bool hasBitIntType() const override { return true; } ++ ++ BuiltinVaListKind getBuiltinVaListKind() const override { ++ return TargetInfo::VoidPtrBuiltinVaList; ++ } ++ ++ ArrayRef getGCCRegNames() const override { ++ static const char *const GCCRegNames[] = { ++ // CPU register names ++ // Must match second column of GCCRegAliases ++ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", ++ "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", ++ "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", ++ "$r28", "$r29", "$r30", "$r31", ++ // Floating point register names ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", ++ "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", ++ "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", ++ "$f28", "$f29", "$f30", "$f31", ++ // condition register names ++ "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", ++ // LSX register names ++ "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", ++ "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", ++ "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", ++ "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", ++ // LASX register names ++ "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", ++ "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", ++ "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", ++ "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31" ++ ++ }; ++ return llvm::makeArrayRef(GCCRegNames); ++ } ++ ++ bool validateAsmConstraint(const char *&Name, ++ TargetInfo::ConstraintInfo &Info) const override { ++ switch (*Name) { ++ default: ++ return false; ++ case 'r': // CPU registers. ++ case 'f': // floating-point registers. ++ Info.setAllowsRegister(); ++ return true; ++ case 'l': // Signed 16-bit constant ++ case 'I': // Signed 12-bit constant ++ case 'K': // Unsigned 12-bit constant ++ case 'J': // Integer 0 ++ case 'G': // Floating-point 0 ++ return true; ++ case 'm': // Memory address with 12-bit offset ++ case 'R': // An address that can be used in a non-macro load or store ++ Info.setAllowsMemory(); ++ return true; ++ case 'Z': ++ if (Name[1] == 'C' // Memory address with 16-bit and 4 bytes aligned offset ++ || Name[1] == 'B' ) { // Memory address with 0 offset ++ Info.setAllowsMemory(); ++ Name++; // Skip over 'Z'. ++ return true; ++ } ++ return false; ++ } ++ } ++ ++ std::string convertConstraint(const char *&Constraint) const override { ++ std::string R; ++ switch (*Constraint) { ++ case 'Z': // Two-character constraint; add "^" hint for later parsing. ++ if (Constraint[1] == 'C' || Constraint[1] == 'B') { ++ R = std::string("^") + std::string(Constraint, 2); ++ Constraint++; ++ return R; ++ } ++ break; ++ } ++ return TargetInfo::convertConstraint(Constraint); ++ } ++ ++ const char *getClobbers() const override { ++#if 0 ++ // In GCC, $1 is not widely used in generated code (it's used only in a few ++ // specific situations), so there is no real need for users to add it to ++ // the clobbers list if they want to use it in their inline assembly code. ++ // ++ // In LLVM, $1 is treated as a normal GPR and is always allocatable during ++ // code generation, so using it in inline assembly without adding it to the ++ // clobbers list can cause conflicts between the inline assembly code and ++ // the surrounding generated code. ++ // ++ // Another problem is that LLVM is allowed to choose $1 for inline assembly ++ // operands, which will conflict with the ".set at" assembler option (which ++ // we use only for inline assembly, in order to maintain compatibility with ++ // GCC) and will also conflict with the user's usage of $1. ++ // ++ // The easiest way to avoid these conflicts and keep $1 as an allocatable ++ // register for generated code is to automatically clobber $1 for all inline ++ // assembly code. ++ // ++ // FIXME: We should automatically clobber $1 only for inline assembly code ++ // which actually uses it. This would allow LLVM to use $1 for inline ++ // assembly operands if the user's assembly code doesn't use it. ++ return "~{$1}"; ++#endif ++ return ""; ++ } ++ ++ bool handleTargetFeatures(std::vector &Features, ++ DiagnosticsEngine &Diags) override { ++ IsSingleFloat = false; ++ FloatABI = HardFloat; ++ FPMode = FP64; ++ ++ for (const auto &Feature : Features) { ++ if (Feature == "+single-float") ++ IsSingleFloat = true; ++ else if (Feature == "+soft-float") ++ FloatABI = SoftFloat; ++ else if (Feature == "+lsx") ++ HasLSX = true; ++ else if (Feature == "+lasx") { ++ HasLASX = true; ++ HasLSX = true; ++ } else if (Feature == "+fp64") ++ FPMode = FP64; ++ else if (Feature == "-fp64") ++ FPMode = FP32; ++ } ++ ++ setDataLayout(); ++ ++ return true; ++ } ++ ++ int getEHDataRegisterNumber(unsigned RegNo) const override { ++ if (RegNo == 0) ++ return 4; ++ if (RegNo == 1) ++ return 5; ++ return -1; ++ } ++ ++ bool isCLZForZeroUndef() const override { return false; } ++ ++ ArrayRef getGCCRegAliases() const override { ++ static const TargetInfo::GCCRegAlias GCCRegAliases[] = { ++ {{"zero", "$zero", "r0", "$0"}, "$r0"}, ++ {{"ra", "$ra", "r1", "$1"}, "$r1"}, ++ {{"tp", "$tp", "r2", "$2"}, "$r2"}, ++ {{"sp", "$sp", "r3", "$3"}, "$r3"}, ++ {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"}, ++ {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"}, ++ {{"a2", "$a2", "r6", "$6"}, "$r6"}, ++ {{"a3", "$a3", "r7", "$7"}, "$r7"}, ++ {{"a4", "$a4", "r8", "$8"}, "$r8"}, ++ {{"a5", "$a5", "r9", "$9"}, "$r9"}, ++ {{"a6", "$a6", "r10", "$10"}, "$r10"}, ++ {{"a7", "$a7", "r11", "$11"}, "$r11"}, ++ {{"t0", "$t0", "r12", "$12"}, "$r12"}, ++ {{"t1", "$t1", "r13", "$13"}, "$r13"}, ++ {{"t2", "$t2", "r14", "$14"}, "$r14"}, ++ {{"t3", "$t3", "r15", "$15"}, "$r15"}, ++ {{"t4", "$t4", "r16", "$16"}, "$r16"}, ++ {{"t5", "$t5", "r17", "$17"}, "$r17"}, ++ {{"t6", "$t6", "r18", "$18"}, "$r18"}, ++ {{"t7", "$t7", "r19", "$19"}, "$r19"}, ++ {{"t8", "$t8", "r20", "$20"}, "$r20"}, ++ //{{"x", "$x", "r21", "$21"}, "$r21"}, ++ {{"fp", "$fp", "r22", "$22"}, "$r22"}, ++ {{"s0", "$s0", "r23", "$23"}, "$r23"}, ++ {{"s1", "$s1", "r24", "$24"}, "$r24"}, ++ {{"s2", "$s2", "r25", "$25"}, "$r25"}, ++ {{"s3", "$s3", "r26", "$26"}, "$r26"}, ++ {{"s4", "$s4", "r27", "$27"}, "$r27"}, ++ {{"s5", "$s5", "r28", "$28"}, "$r28"}, ++ {{"s6", "$s6", "r29", "$29"}, "$r29"}, ++ {{"s7", "$s7", "r30", "$30"}, "$r30"}, ++ {{"s8", "$s8", "r31", "$31"}, "$r31"}, ++ {{"fa0", "$fa0", "f0"}, "$f0"}, ++ {{"fa1", "$fa1", "f1"}, "$f1"}, ++ {{"fa2", "$fa2", "f2"}, "$f2"}, ++ {{"fa3", "$fa3", "f3"}, "$f3"}, ++ {{"fa4", "$fa4", "f4"}, "$f4"}, ++ {{"fa5", "$fa5", "f5"}, "$f5"}, ++ {{"fa6", "$fa6", "f6"}, "$f6"}, ++ {{"fa7", "$fa7", "f7"}, "$f7"}, ++ {{"ft0", "$ft0", "f8"}, "$f8"}, ++ {{"ft1", "$ft1", "f9"}, "$f9"}, ++ {{"ft2", "$ft2", "f10"}, "$f10"}, ++ {{"ft3", "$ft3", "f11"}, "$f11"}, ++ {{"ft4", "$ft4", "f12"}, "$f12"}, ++ {{"ft5", "$ft5", "f13"}, "$f13"}, ++ {{"ft6", "$ft6", "f14"}, "$f14"}, ++ {{"ft7", "$ft7", "f15"}, "$f15"}, ++ {{"ft8", "$ft8", "f16"}, "$f16"}, ++ {{"ft9", "$ft9", "f17"}, "$f17"}, ++ {{"ft10", "$ft10", "f18"}, "$f18"}, ++ {{"ft11", "$ft11", "f19"}, "$f19"}, ++ {{"ft12", "$ft12", "f20"}, "$f20"}, ++ {{"ft13", "$ft13", "f21"}, "$f21"}, ++ {{"ft14", "$ft14", "f22"}, "$f22"}, ++ {{"ft15", "$ft15", "f23"}, "$f23"}, ++ {{"fs0", "$fs0", "f24"}, "$f24"}, ++ {{"fs1", "$fs1", "f25"}, "$f25"}, ++ {{"fs2", "$fs2", "f26"}, "$f26"}, ++ {{"fs3", "$fs3", "f27"}, "$f27"}, ++ {{"fs4", "$fs4", "f28"}, "$f28"}, ++ {{"fs5", "$fs5", "f29"}, "$f29"}, ++ {{"fs6", "$fs6", "f30"}, "$f30"}, ++ {{"fs7", "$fs7", "f31"}, "$f31"}, ++ }; ++ return llvm::makeArrayRef(GCCRegAliases); ++ } ++ ++ bool hasInt128Type() const override { ++ return (ABI == "lpx32" || ABI == "lp64") || getTargetOpts().ForceEnableInt128; ++ } ++ ++ bool validateTarget(DiagnosticsEngine &Diags) const override; ++}; ++} // namespace targets ++} // namespace clang ++ ++#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H +diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp +index d87692fac..197915e15 100644 +--- a/clang/lib/CodeGen/CodeGenModule.cpp ++++ b/clang/lib/CodeGen/CodeGenModule.cpp +@@ -84,6 +84,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { + case TargetCXXABI::GenericARM: + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp +index fc2ff15a6..eebd6fb65 100644 +--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp ++++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp +@@ -533,6 +533,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true); + ++ case TargetCXXABI::GenericLoongArch: ++ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); ++ + case TargetCXXABI::GenericMIPS: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + +diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp +index 44743fa02..fbc6aed85 100644 +--- a/clang/lib/CodeGen/TargetInfo.cpp ++++ b/clang/lib/CodeGen/TargetInfo.cpp +@@ -11323,6 +11323,558 @@ public: + }; + } // namespace + ++//===----------------------------------------------------------------------===// ++// LoongArch ABI Implementation ++//===----------------------------------------------------------------------===// ++ ++namespace { ++class LoongArchABIInfo : public DefaultABIInfo { ++private: ++ // Size of the integer ('r') registers in bits. ++ unsigned GRLen; ++ // Size of the floating point ('f') registers in bits. Note that the target ++ // ISA might have a wider FRLen than the selected ABI. ++ unsigned FRLen; ++ static const int NumArgGPRs = 8; ++ static const int NumArgFPRs = 8; ++ bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const; ++ ++public: ++ LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) ++ : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} ++ ++ // DefaultABIInfo's classifyReturnType and classifyArgumentType are ++ // non-virtual, but computeInfo is virtual, so we overload it. ++ void computeInfo(CGFunctionInfo &FI) const override; ++ ++ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const; ++ ABIArgInfo classifyReturnType(QualType RetTy) const; ++ ++ uint64_t MinABIStackAlignInBytes = 8; ++ uint64_t StackAlignInBytes = 16; ++ llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; ++ llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; ++ void CoerceToIntArgs(uint64_t TySize, ++ SmallVectorImpl &ArgList) const; ++ ++ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const override; ++ ++ ABIArgInfo extendType(QualType Ty) const; ++ ++ bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, int &NeededArgGPRs, ++ int &NeededArgFPRs) const; ++ ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, ++ CharUnits Field1Off, ++ llvm::Type *Field2Ty, ++ CharUnits Field2Off) const; ++}; ++} // end anonymous namespace ++ ++void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { ++ QualType RetTy = FI.getReturnType(); ++ if (!getCXXABI().classifyReturnType(FI)) ++ FI.getReturnInfo() = classifyReturnType(RetTy); ++ ++ // IsRetIndirect is true if classifyArgumentType indicated the value should ++ // be passed indirect or if the type size is greater than 2*grlen. ++ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || ++ getContext().getTypeSize(RetTy) > (2 * GRLen); ++ ++ // We must track the number of GPRs used in order to conform to the LoongArch ++ // ABI, as integer scalars passed in registers should have signext/zeroext ++ // when promoted, but are anyext if passed on the stack. As GPR usage is ++ // different for variadic arguments, we must also track whether we are ++ // examining a vararg or not. ++ int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; ++ int ArgFPRsLeft = FRLen ? NumArgFPRs : 0; ++ int NumFixedArgs = FI.getNumRequiredArgs(); ++ ++ int ArgNum = 0; ++ for (auto &ArgInfo : FI.arguments()) { ++ bool IsFixed = ArgNum < NumFixedArgs; ++ ArgInfo.info = ++ classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); ++ ArgNum++; ++ } ++} ++ ++// Returns true if the struct is a potential candidate for the floating point ++// calling convention. If this function returns true, the caller is ++// responsible for checking that if there is only a single field then that ++// field is a float. ++bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const { ++ bool IsInt = Ty->isIntegralOrEnumerationType(); ++ bool IsFloat = Ty->isRealFloatingType(); ++ ++ if (IsInt || IsFloat) { ++ uint64_t Size = getContext().getTypeSize(Ty); ++ if (IsInt && Size > GRLen) ++ return false; ++ // Can't be eligible if larger than the FP registers. Half precision isn't ++ // currently supported on LoongArch and the ABI hasn't been confirmed, so ++ // default to the integer ABI in that case. ++ if (IsFloat && (Size > FRLen || Size < 32)) ++ return false; ++ // Can't be eligible if an integer type was already found (int+int pairs ++ // are not eligible). ++ if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) ++ return false; ++ if (!Field1Ty) { ++ Field1Ty = CGT.ConvertType(Ty); ++ Field1Off = CurOff; ++ return true; ++ } ++ if (!Field2Ty) { ++ Field2Ty = CGT.ConvertType(Ty); ++ Field2Off = CurOff; ++ return true; ++ } ++ return false; ++ } ++ ++ if (auto CTy = Ty->getAs()) { ++ if (Field1Ty) ++ return false; ++ QualType EltTy = CTy->getElementType(); ++ if (getContext().getTypeSize(EltTy) > FRLen) ++ return false; ++ Field1Ty = CGT.ConvertType(EltTy); ++ Field1Off = CurOff; ++ assert(CurOff.isZero() && "Unexpected offset for first field"); ++ Field2Ty = Field1Ty; ++ Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); ++ return true; ++ } ++ ++ if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { ++ uint64_t ArraySize = ATy->getSize().getZExtValue(); ++ QualType EltTy = ATy->getElementType(); ++ CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); ++ for (uint64_t i = 0; i < ArraySize; ++i) { ++ bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, ++ Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ CurOff += EltSize; ++ } ++ return true; ++ } ++ ++ if (const auto *RTy = Ty->getAs()) { ++ // Structures with either a non-trivial destructor or a non-trivial ++ // copy constructor are not eligible for the FP calling convention. ++ if (getRecordArgABI(Ty, CGT.getCXXABI())) ++ return false; ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return true; ++ const RecordDecl *RD = RTy->getDecl(); ++ // Unions aren't eligible unless they're empty (which is caught above). ++ if (RD->isUnion()) ++ return false; ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ // If this is a C++ record, check the bases first. ++ if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { ++ for (const CXXBaseSpecifier &B : CXXRD->bases()) { ++ const auto *BDecl = ++ cast(B.getType()->castAs()->getDecl()); ++ CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); ++ bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, ++ Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ if (!Ret) ++ return false; ++ } ++ } ++ int ZeroWidthBitFieldCount = 0; ++ for (const FieldDecl *FD : RD->fields()) { ++ uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); ++ QualType QTy = FD->getType(); ++ if (FD->isBitField()) { ++ unsigned BitWidth = FD->getBitWidthValue(getContext()); ++ // Allow a bitfield with a type greater than GRLen as long as the ++ // bitwidth is GRLen or less. ++ if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) ++ QTy = getContext().getIntTypeForBitwidth(GRLen, false); ++ if (BitWidth == 0) { ++ ZeroWidthBitFieldCount++; ++ continue; ++ } ++ } ++ ++ bool Ret = detectFPCCEligibleStructHelper( ++ QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), ++ Field1Ty, Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ ++ // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp ++ // or int+fp structs, but are ignored for a struct with an fp field and ++ // any number of zero-width bitfields. ++ if (Field2Ty && ZeroWidthBitFieldCount > 0) ++ return false; ++ } ++ return Field1Ty != nullptr; ++ } ++ ++ return false; ++} ++ ++// Determine if a struct is eligible for passing according to the floating ++// point calling convention (i.e., when flattened it contains a single fp ++// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and ++// NeededArgGPRs are incremented appropriately. ++bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, ++ int &NeededArgGPRs, ++ int &NeededArgFPRs) const { ++ Field1Ty = nullptr; ++ Field2Ty = nullptr; ++ NeededArgGPRs = 0; ++ NeededArgFPRs = 0; ++ bool IsCandidate = detectFPCCEligibleStructHelper( ++ Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); ++ // Not really a candidate if we have a single int but no float. ++ if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) ++ return IsCandidate = false; ++ if (!IsCandidate) ++ return false; ++ if (Field1Ty && Field1Ty->isFloatingPointTy()) ++ NeededArgFPRs++; ++ else if (Field1Ty) ++ NeededArgGPRs++; ++ if (Field2Ty && Field2Ty->isFloatingPointTy()) ++ NeededArgFPRs++; ++ else if (Field2Ty) ++ NeededArgGPRs++; ++ return IsCandidate; ++} ++ ++// Call getCoerceAndExpand for the two-element flattened struct described by ++// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an ++// appropriate coerceToType and unpaddedCoerceToType. ++ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct( ++ llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, ++ CharUnits Field2Off) const { ++ SmallVector CoerceElts; ++ SmallVector UnpaddedCoerceElts; ++ if (!Field1Off.isZero()) ++ CoerceElts.push_back(llvm::ArrayType::get( ++ llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); ++ ++ CoerceElts.push_back(Field1Ty); ++ UnpaddedCoerceElts.push_back(Field1Ty); ++ ++ if (!Field2Ty) { ++ return ABIArgInfo::getCoerceAndExpand( ++ llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), ++ UnpaddedCoerceElts[0]); ++ } ++ ++ CharUnits Field2Align = ++ CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); ++ CharUnits Field1Size = ++ CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); ++ CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align); ++ ++ CharUnits Padding = CharUnits::Zero(); ++ if (Field2Off > Field2OffNoPadNoPack) ++ Padding = Field2Off - Field2OffNoPadNoPack; ++ else if (Field2Off != Field2Align && Field2Off > Field1Size) ++ Padding = Field2Off - Field1Size; ++ ++ bool IsPacked = !Field2Off.isMultipleOf(Field2Align); ++ ++ if (!Padding.isZero()) ++ CoerceElts.push_back(llvm::ArrayType::get( ++ llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); ++ ++ CoerceElts.push_back(Field2Ty); ++ UnpaddedCoerceElts.push_back(Field2Ty); ++ ++ auto CoerceToType = ++ llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); ++ auto UnpaddedCoerceToType = ++ llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); ++ ++ return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); ++} ++ ++void LoongArchABIInfo::CoerceToIntArgs( ++ uint64_t TySize, SmallVectorImpl &ArgList) const { ++ llvm::IntegerType *IntTy = ++ llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); ++ ++ // Add (TySize / MinABIStackAlignInBytes) args of IntTy. ++ for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ++ ArgList.push_back(IntTy); ++ ++ // If necessary, add one more integer type to ArgList. ++ unsigned R = TySize % (MinABIStackAlignInBytes * 8); ++ ++ if (R) ++ ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); ++} ++ ++llvm::Type* LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { ++ SmallVector ArgList, IntArgList; ++ ++ if (Ty->isComplexType()) ++ return CGT.ConvertType(Ty); ++ ++ const RecordType *RT = Ty->getAs(); ++ ++ // Unions/vectors are passed in integer registers. ++ if (!RT || !RT->isStructureOrClassType()) { ++ CoerceToIntArgs(TySize, ArgList); ++ return llvm::StructType::get(getVMContext(), ArgList); ++ } ++ ++ const RecordDecl *RD = RT->getDecl(); ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ assert(!(TySize % 8) && "Size of structure must be multiple of 8."); ++ ++ uint64_t LastOffset = 0; ++ unsigned idx = 0; ++ llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); ++ ++ // Iterate over fields in the struct/class and check if there are any aligned ++ // double fields. ++ for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); ++ i != e; ++i, ++idx) { ++ const QualType Ty = i->getType(); ++ const BuiltinType *BT = Ty->getAs(); ++ ++ if (!BT || BT->getKind() != BuiltinType::Double) ++ continue; ++ ++ uint64_t Offset = Layout.getFieldOffset(idx); ++ if (Offset % 64) // Ignore doubles that are not aligned. ++ continue; ++ ++ // Add ((Offset - LastOffset) / 64) args of type i64. ++ for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ++ ArgList.push_back(I64); ++ ++ // Add double type. ++ ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); ++ LastOffset = Offset + 64; ++ } ++ ++ CoerceToIntArgs(TySize - LastOffset, IntArgList); ++ ArgList.append(IntArgList.begin(), IntArgList.end()); ++ ++ return llvm::StructType::get(getVMContext(), ArgList); ++} ++ ++llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset, ++ uint64_t Offset) const { ++ if (OrigOffset + MinABIStackAlignInBytes > Offset) ++ return nullptr; ++ ++ return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); ++} ++ ++ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, ++ int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const { ++ assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); ++ Ty = useFirstFieldIfTransparentUnion(Ty); ++ ++ // Structures with either a non-trivial destructor or a non-trivial ++ // copy constructor are always passed indirectly. ++ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { ++ if (ArgGPRsLeft) ++ ArgGPRsLeft -= 1; ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == ++ CGCXXABI::RAA_DirectInMemory); ++ } ++ ++ // Ignore empty structs/unions. ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return ABIArgInfo::getIgnore(); ++ ++ uint64_t Size = getContext().getTypeSize(Ty); ++ ++ // Pass floating point values via FPRs if possible. ++ if (IsFixed && Ty->isFloatingType() && FRLen >= Size && ArgFPRsLeft) { ++ ArgFPRsLeft--; ++ return ABIArgInfo::getDirect(); ++ } ++ ++ // Complex types for the hard float ABI must be passed direct rather than ++ // using CoerceAndExpand. ++ if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) { ++ QualType EltTy = Ty->getAs()->getElementType(); ++ if (getContext().getTypeSize(EltTy) <= FRLen) { ++ ArgFPRsLeft -= 2; ++ return ABIArgInfo::getDirect(); ++ } ++ } ++ ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ ++ if (IsFixed && FRLen && Ty->isStructureOrClassType()) { ++ llvm::Type *Field1Ty = nullptr; ++ llvm::Type *Field2Ty = nullptr; ++ CharUnits Field1Off = CharUnits::Zero(); ++ CharUnits Field2Off = CharUnits::Zero(); ++ int NeededArgGPRs; ++ int NeededArgFPRs; ++ bool IsCandidate = ++ detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, ++ NeededArgGPRs, NeededArgFPRs); ++ if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && ++ NeededArgFPRs <= ArgFPRsLeft) { ++ ArgGPRsLeft -= NeededArgGPRs; ++ ArgFPRsLeft -= NeededArgFPRs; ++ return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ } ++ } else if (Ty->isStructureOrClassType() && Size == 128 && ++ isAggregateTypeForABI(Ty)) { ++ uint64_t Offset = 8; ++ uint64_t OrigOffset = Offset; ++ uint64_t TySize = getContext().getTypeSize(Ty); ++ uint64_t Align = getContext().getTypeAlign(Ty) / 8; ++ ++ Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), ++ (uint64_t)StackAlignInBytes); ++ unsigned CurrOffset = llvm::alignTo(Offset, Align); ++ Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; ++ ++ ABIArgInfo ArgInfo = ++ ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, ++ getPaddingType(OrigOffset, CurrOffset)); ++ ArgInfo.setInReg(true); ++ return ArgInfo; ++ } ++ ++ uint64_t NeededAlign = getContext().getTypeAlign(Ty); ++ // Determine the number of GPRs needed to pass the current argument ++ // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" ++ // register pairs, so may consume 3 registers. ++ int NeededArgGPRs = 1; ++ if (!IsFixed && NeededAlign == 2 * GRLen) ++ NeededArgGPRs = 2 + (ArgGPRsLeft % 2); ++ else if (Size > GRLen && Size <= 2 * GRLen) ++ NeededArgGPRs = 2; ++ ++ if (NeededArgGPRs > ArgGPRsLeft) { ++ NeededArgGPRs = ArgGPRsLeft; ++ } ++ ++ ArgGPRsLeft -= NeededArgGPRs; ++ ++ if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = Ty->getAs()) ++ Ty = EnumTy->getDecl()->getIntegerType(); ++ ++ // All integral types are promoted to GRLen width, unless passed on the ++ // stack. ++ if (Size < GRLen && Ty->isIntegralOrEnumerationType()) { ++ return extendType(Ty); ++ } ++ ++ return ABIArgInfo::getDirect(); ++ } ++ ++ // Aggregates which are <= 2*GRLen will be passed in registers if possible, ++ // so coerce to integers. ++ if (Size <= 2 * GRLen) { ++ unsigned Alignment = getContext().getTypeAlign(Ty); ++ ++ // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is ++ // required, and a 2-element GRLen array if only GRLen alignment is required. ++ if (Size <= GRLen) { ++ return ABIArgInfo::getDirect( ++ llvm::IntegerType::get(getVMContext(), GRLen)); ++ } else if (Alignment == 2 * GRLen) { ++ return ABIArgInfo::getDirect( ++ llvm::IntegerType::get(getVMContext(), 2 * GRLen)); ++ } else { ++ return ABIArgInfo::getDirect(llvm::ArrayType::get( ++ llvm::IntegerType::get(getVMContext(), GRLen), 2)); ++ } ++ } ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/false); ++} ++ ++ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { ++ if (RetTy->isVoidType()) ++ return ABIArgInfo::getIgnore(); ++ ++ int ArgGPRsLeft = 2; ++ int ArgFPRsLeft = FRLen ? 2 : 0; ++ ++ // The rules for return and argument types are the same, so defer to ++ // classifyArgumentType. ++ return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, ++ ArgFPRsLeft); ++} ++ ++Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const { ++ CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); ++ ++ // Empty records are ignored for parameter passing purposes. ++ if (isEmptyRecord(getContext(), Ty, true)) { ++ Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), ++ getVAListElementType(CGF), SlotSize); ++ Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); ++ return Addr; ++ } ++ ++ auto TInfo = getContext().getTypeInfoInChars(Ty); ++ ++ // Arguments bigger than 2*GRlen bytes are passed indirectly. ++ bool IsIndirect = TInfo.Width > 2 * SlotSize; ++ ++ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, ++ SlotSize, /*AllowHigherAlign=*/true); ++} ++ ++ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { ++ int TySize = getContext().getTypeSize(Ty); ++ // LP64 ABI requires unsigned 32 bit integers to be sign extended. ++ if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) ++ return ABIArgInfo::getSignExtend(Ty); ++ return ABIArgInfo::getExtend(Ty); ++} ++ ++namespace { ++class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { ++public: ++ LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, ++ unsigned FRLen) ++ : TargetCodeGenInfo(std::make_unique( ++ CGT, GRLen, FRLen)) {} ++ ++ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, ++ CodeGen::CodeGenModule &CGM) const override { ++ return; ++ } ++}; ++} // namespace ++ + //===----------------------------------------------------------------------===// + // VE ABI Implementation. + // +@@ -11560,6 +12112,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + + case llvm::Triple::le32: + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); ++ + case llvm::Triple::m68k: + return SetCGInfo(new M68kTargetCodeGenInfo(Types)); + case llvm::Triple::mips: +@@ -11677,6 +12230,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + case llvm::Triple::msp430: + return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); + ++ case llvm::Triple::loongarch64: ++ return SetCGInfo(new LoongArchTargetCodeGenInfo(Types, 64, 64)); ++ + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: { + StringRef ABIStr = getTarget().getABI(); +diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt +index 18c9b2d04..14c7053e0 100644 +--- a/clang/lib/Driver/CMakeLists.txt ++++ b/clang/lib/Driver/CMakeLists.txt +@@ -28,6 +28,7 @@ add_clang_library(clangDriver + ToolChains/Arch/AArch64.cpp + ToolChains/Arch/ARM.cpp + ToolChains/Arch/CSKY.cpp ++ ToolChains/Arch/LoongArch.cpp + ToolChains/Arch/M68k.cpp + ToolChains/Arch/Mips.cpp + ToolChains/Arch/PPC.cpp +diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp +index 3f29afd35..50970bd33 100644 +--- a/clang/lib/Driver/Driver.cpp ++++ b/clang/lib/Driver/Driver.cpp +@@ -626,6 +626,29 @@ static llvm::Triple computeTargetTriple(const Driver &D, + Target.setVendorName("intel"); + } + ++ // If target is LoongArch adjust the target triple ++ // accordingly to provided ABI name. ++ A = Args.getLastArg(options::OPT_mabi_EQ); ++ if (A && Target.isLoongArch()) { ++ StringRef ABIName = A->getValue(); ++ if (ABIName == "lp32") { ++ Target = Target.get32BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNUABI64 || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNU); ++ } else if (ABIName == "lpx32") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABI64) ++ Target.setEnvironment(llvm::Triple::GNUABILPX32); ++ } else if (ABIName == "lp64") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNUABI64); ++ } ++ } ++ + // If target is MIPS adjust the target triple + // accordingly to provided ABI name. + A = Args.getLastArg(options::OPT_mabi_EQ); +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +new file mode 100644 +index 000000000..2c42db690 +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -0,0 +1,211 @@ ++//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "ToolChains/CommonArgs.h" ++#include "clang/Driver/Driver.h" ++#include "clang/Driver/DriverDiagnostic.h" ++#include "clang/Driver/Options.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/Option/ArgList.h" ++ ++using namespace clang::driver; ++using namespace clang::driver::tools; ++using namespace clang; ++using namespace llvm::opt; ++ ++// Get CPU and ABI names. They are not independent ++// so we have to calculate them together. ++void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, ++ StringRef &CPUName, StringRef &ABIName) { ++ const char *DefLoongArch32CPU = "loongarch32"; ++ const char *DefLoongArch64CPU = "la464"; ++ ++ if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ, ++ options::OPT_mcpu_EQ)) ++ CPUName = A->getValue(); ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ++ ABIName = A->getValue(); ++ // Convert a GNU style LoongArch ABI name to the name ++ // accepted by LLVM LoongArch backend. ++ ABIName = llvm::StringSwitch(ABIName) ++ .Case("32", "lp32") ++ .Case("64", "lp64") ++ .Default(ABIName); ++ } ++ ++ // Setup default CPU and ABI names. ++ if (CPUName.empty() && ABIName.empty()) { ++ switch (Triple.getArch()) { ++ default: ++ llvm_unreachable("Unexpected triple arch name"); ++ case llvm::Triple::loongarch32: ++ CPUName = DefLoongArch32CPU; ++ break; ++ case llvm::Triple::loongarch64: ++ CPUName = DefLoongArch64CPU; ++ break; ++ } ++ } ++ ++ if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32)) ++ ABIName = "lpx32"; ++ ++ if (ABIName.empty()) { ++ ABIName = llvm::StringSwitch(CPUName) ++ .Case("loongarch32", "lp32") ++ .Cases("la264", "la364", "la464", "lp64") ++ .Default(""); ++ } ++ ++ if (ABIName.empty()) { ++ // Deduce ABI name from the target triple. ++ ABIName = Triple.isLoongArch32() ? "lp32" : "lp64"; ++ } ++ ++ if (CPUName.empty()) { ++ // Deduce CPU name from ABI name. ++ CPUName = llvm::StringSwitch(ABIName) ++ .Case("lp32", DefLoongArch32CPU) ++ .Cases("lpx32", "lp64", DefLoongArch64CPU) ++ .Default(""); ++ } ++ ++ // FIXME: Warn on inconsistent use of -march and -mabi. ++} ++ ++std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args, ++ const llvm::Triple &Triple) { ++ StringRef CPUName, ABIName; ++ tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ return llvm::StringSwitch(ABIName) ++ .Case("lp32", "") ++ .Case("lpx32", "32") ++ .Case("lp64", "64"); ++} ++ ++// Convert ABI name to the GNU tools acceptable variant. ++StringRef loongarch::getGnuCompatibleLoongArchABIName(StringRef ABI) { ++ return llvm::StringSwitch(ABI) ++ .Case("lp32", "32") ++ .Case("lp64", "64") ++ .Default(ABI); ++} ++ ++// Select the LoongArch float ABI as determined by -msoft-float, -mhard-float, ++// and -mfloat-abi=. ++loongarch::FloatABI loongarch::getLoongArchFloatABI(const Driver &D, const ArgList &Args) { ++ loongarch::FloatABI ABI = loongarch::FloatABI::Invalid; ++ if (Arg *A = ++ Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, ++ options::OPT_mfloat_abi_EQ)) { ++ if (A->getOption().matches(options::OPT_msoft_float)) ++ ABI = loongarch::FloatABI::Soft; ++ else if (A->getOption().matches(options::OPT_mhard_float)) ++ ABI = loongarch::FloatABI::Hard; ++ else { ++ ABI = llvm::StringSwitch(A->getValue()) ++ .Case("soft", loongarch::FloatABI::Soft) ++ .Case("hard", loongarch::FloatABI::Hard) ++ .Default(loongarch::FloatABI::Invalid); ++ if (ABI == loongarch::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { ++ D.Diag(clang::diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ++ ABI = loongarch::FloatABI::Hard; ++ } ++ } ++ } ++ ++ // If unspecified, choose the default based on the platform. ++ if (ABI == loongarch::FloatABI::Invalid) { ++ // Assume "hard", because it's a default value used by gcc. ++ // When we start to recognize specific target LoongArch processors, ++ // we will be able to select the default more correctly. ++ ABI = loongarch::FloatABI::Hard; ++ } ++ ++ assert(ABI != loongarch::FloatABI::Invalid && "must select an ABI"); ++ return ABI; ++} ++ ++void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const ArgList &Args, ++ std::vector &Features) { ++ StringRef CPUName; ++ StringRef ABIName; ++ getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ABIName = getGnuCompatibleLoongArchABIName(ABIName); ++ ++ // At final link time, LP32 and LPX32 with CPIC will have another section ++ // added to the binary which contains the stub functions to perform ++ // any fixups required for PIC code. ++ ++ bool IsLP64 = ABIName == "64"; ++ bool NonPIC = false; ++ ++ Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, ++ options::OPT_fpic, options::OPT_fno_pic, ++ options::OPT_fPIE, options::OPT_fno_PIE, ++ options::OPT_fpie, options::OPT_fno_pie); ++ if (LastPICArg) { ++ Option O = LastPICArg->getOption(); ++ NonPIC = ++ (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) || ++ O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie)); ++ } ++ ++ if (IsLP64 && NonPIC) { ++ NonPIC = false; ++ } ++ ++ loongarch::FloatABI FloatABI = loongarch::getLoongArchFloatABI(D, Args); ++ if (FloatABI == loongarch::FloatABI::Soft) { ++ // FIXME: Note, this is a hack. We need to pass the selected float ++ // mode to the LoongArchTargetInfoBase to define appropriate macros there. ++ // Now it is the only method. ++ Features.push_back("+soft-float"); ++ } ++ ++ AddTargetFeature(Args, Features, options::OPT_msingle_float, ++ options::OPT_mdouble_float, "single-float"); ++ ++ AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx, ++ "lsx"); ++ AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx, ++ "lasx"); ++ ++ AddTargetFeature(Args, Features, options::OPT_munaligned_access, ++ options::OPT_mno_unaligned_access, "unaligned-access"); ++ ++ // Add the last -mfp32/-mfp64, if none are given and fp64 is default, ++ // pass fp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ if (A->getOption().matches(options::OPT_mfp32)) ++ Features.push_back("-fp64"); ++ else ++ Features.push_back("+fp64"); ++ } else if (loongarch::isFP64Default(Args)) { ++ Features.push_back("+fp64"); ++ } ++ ++} ++ ++bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) { ++ Arg *A = Args.getLastArg(options::OPT_mabi_EQ); ++ return A && (A->getValue() == StringRef(Value)); ++} ++ ++bool loongarch::isUCLibc(const ArgList &Args) { ++ Arg *A = Args.getLastArg(options::OPT_m_libc_Group); ++ return A && A->getOption().matches(options::OPT_muclibc); ++} ++ ++bool loongarch::isFP64Default(const ArgList &Args) { ++ return Args.getLastArg(options::OPT_msingle_float) ? false : true; ++} +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.h b/clang/lib/Driver/ToolChains/Arch/LoongArch.h +new file mode 100644 +index 000000000..53664346f +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.h +@@ -0,0 +1,49 @@ ++//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H ++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H ++ ++#include "clang/Driver/Driver.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/Option/Option.h" ++#include ++#include ++ ++namespace clang { ++namespace driver { ++namespace tools { ++ ++namespace loongarch { ++enum class FloatABI { ++ Invalid, ++ Soft, ++ Hard, ++}; ++ ++void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple, StringRef &CPUName, ++ StringRef &ABIName); ++void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args, ++ std::vector &Features); ++StringRef getGnuCompatibleLoongArchABIName(StringRef ABI); ++loongarch::FloatABI getLoongArchFloatABI(const Driver &D, const llvm::opt::ArgList &Args); ++std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple); ++bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value); ++bool isUCLibc(const llvm::opt::ArgList &Args); ++bool isFP64Default(const llvm::opt::ArgList &Args); ++ ++} // end namespace loongarch ++} // end namespace target ++} // end namespace driver ++} // end namespace clang ++ ++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 3704ed858..8381fb9bd 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -11,6 +11,7 @@ + #include "Arch/AArch64.h" + #include "Arch/ARM.h" + #include "Arch/CSKY.h" ++#include "Arch/LoongArch.h" + #include "Arch/M68k.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" +@@ -317,6 +318,11 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS); + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); ++ break; ++ + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -527,6 +533,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, + // XCore never wants frame pointers, regardless of OS. + // WebAssembly never wants frame pointers. + return false; ++ case llvm::Triple::loongarch64: ++ case llvm::Triple::loongarch32: + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -1794,6 +1802,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, + CmdArgs.push_back("-fallow-half-arguments-and-returns"); + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -1933,6 +1946,45 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, + AddUnalignedAccessWarning(CmdArgs); + } + ++void Clang::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ const Driver &D = getToolChain().getDriver(); ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++ ++ loongarch::FloatABI ABI = loongarch::getLoongArchFloatABI(D, Args); ++ if (ABI == loongarch::FloatABI::Soft) { ++ // Floating point operations and argument passing are soft. ++ CmdArgs.push_back("-msoft-float"); ++ CmdArgs.push_back("-mfloat-abi"); ++ CmdArgs.push_back("soft"); ++ } else { ++ // Floating point operations and argument passing are hard. ++ assert(ABI == loongarch::FloatABI::Hard && "Invalid float abi!"); ++ CmdArgs.push_back("-mfloat-abi"); ++ CmdArgs.push_back("hard"); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, ++ options::OPT_mno_check_zero_division)) { ++ if (A->getOption().matches(options::OPT_mno_check_zero_division)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-mnocheck-zero-division"); ++ } ++ } ++ ++ llvm::Reloc::Model RelocationModel; ++ unsigned PICLevel; ++ bool IsPIE; ++ std::tie(RelocationModel, PICLevel, IsPIE) = ++ ParsePICArgs(getToolChain(), Args); ++} ++ + void Clang::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); +@@ -7812,6 +7864,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args, + + // Begin ClangAs + ++void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++} ++ + void ClangAs::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + StringRef CPUName; +@@ -8007,6 +8070,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, + default: + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h +index 5209c6687..e28012af1 100644 +--- a/clang/lib/Driver/ToolChains/Clang.h ++++ b/clang/lib/Driver/ToolChains/Clang.h +@@ -57,6 +57,8 @@ private: + bool KernelOrKext) const; + void AddARM64TargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; ++ void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddPPCTargetArgs(const llvm::opt::ArgList &Args, +@@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool { + public: + ClangAs(const ToolChain &TC) + : Tool("clang::as", "clang integrated assembler", TC) {} ++ void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddX86TargetArgs(const llvm::opt::ArgList &Args, +diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp +index 443725f7d..a3778db38 100644 +--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp +@@ -9,6 +9,7 @@ + #include "CommonArgs.h" + #include "Arch/AArch64.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/M68k.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" +@@ -376,6 +377,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, + return A->getValue(); + return ""; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName); ++ return std::string(CPUName); ++ } ++ + case llvm::Triple::m68k: + return m68k::getM68kTargetCPU(Args); + +@@ -1378,6 +1387,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { + if ((ROPI || RWPI) && (PIC || PIE)) + ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); + ++ if (Triple.isLoongArch()) { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ // When targeting the LP64 ABI, PIC is the default. ++ if (ABIName == "lp64") ++ PIC = true; ++ // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot, ++ // does not use PIC level 2 for historical reasons. ++ IsPICLevelTwo = false; ++ } ++ + if (Triple.isMIPS()) { + StringRef CPUName; + StringRef ABIName; +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index 665cdc313..aad574bbe 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -9,6 +9,7 @@ + #include "Gnu.h" + #include "Arch/ARM.h" + #include "Arch/CSKY.h" ++#include "Arch/LoongArch.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" +@@ -255,6 +256,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { + case llvm::Triple::armeb: + case llvm::Triple::thumbeb: + return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi"; ++ case llvm::Triple::loongarch32: ++ return "elf32loongarch"; ++ case llvm::Triple::loongarch64: ++ return "elf64loongarch"; + case llvm::Triple::m68k: + return "m68kelf"; + case llvm::Triple::ppc: +@@ -856,6 +861,63 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, + + break; + } ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ++ ABIName = loongarch::getGnuCompatibleLoongArchABIName(ABIName); ++ ++ //FIXME: Currently gnu as doesn't support -march ++ //CmdArgs.push_back("-march=loongarch"); ++ //CmdArgs.push_back(CPUName.data()); ++ ++ //FIXME: modify loongarch::getGnuCompatibleLoongArchABIName() ++ CmdArgs.push_back("-mabi=lp64"); ++ //CmdArgs.push_back(ABIName.data()); ++ ++ // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, ++ // or -mshared (not implemented) is in effect. ++ if (RelocationModel == llvm::Reloc::Static) ++ CmdArgs.push_back("-mno-shared"); ++ ++ // LLVM doesn't support -mplt yet and acts as if it is always given. ++ // However, -mplt has no effect with the LP64 ABI. ++ if (ABIName != "64") ++ CmdArgs.push_back("-call_nonpic"); ++ ++ break; ++ ++ // Add the last -mfp32/-mfp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ A->claim(); ++ A->render(Args, CmdArgs); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlsx / -mno-lsx options. ++ if (A->getOption().matches(options::OPT_mlsx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlsx")); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlasx / -mno-lasx options. ++ if (A->getOption().matches(options::OPT_mlasx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlasx")); ++ } ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mhard_float, ++ options::OPT_msoft_float); ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, ++ options::OPT_msingle_float); ++ ++ AddAssemblerKPIC(getToolChain(), Args, CmdArgs); ++ break; ++ } + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -2294,6 +2356,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", + "s390x-suse-linux", "s390x-redhat-linux"}; + ++ static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; ++ static const char *const LoongArch64Triples[] = { ++ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", ++ "loongarch64-loongson-linux-gnu", "loongarch64-redhat-linux"}; + + using std::begin; + using std::end; +@@ -2466,6 +2532,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); + } + break; ++ case llvm::Triple::loongarch64: ++ LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); ++ TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); ++ break; + case llvm::Triple::m68k: + LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs)); + TripleAliases.append(begin(M68kTriples), end(M68kTriples)); +@@ -2823,6 +2893,7 @@ bool Generic_GCC::isPICDefault() const { + switch (getArch()) { + case llvm::Triple::x86_64: + return getTriple().isOSWindows(); ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + return true; +@@ -2863,6 +2934,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const { + case llvm::Triple::ppc64le: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: + case llvm::Triple::sparc: + case llvm::Triple::sparcel: + case llvm::Triple::sparcv9: +diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp +index ceb1a982c..45adea6dd 100644 +--- a/clang/lib/Driver/ToolChains/Linux.cpp ++++ b/clang/lib/Driver/ToolChains/Linux.cpp +@@ -8,6 +8,7 @@ + + #include "Linux.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" +@@ -85,6 +86,11 @@ std::string Linux::getMultiarchTriple(const Driver &D, + case llvm::Triple::aarch64_be: + return "aarch64_be-linux-gnu"; + ++ case llvm::Triple::loongarch32: ++ return "loongarch32-linux-gnu"; ++ case llvm::Triple::loongarch64: ++ return "loongarch64-linux-gnu"; ++ + case llvm::Triple::m68k: + return "m68k-linux-gnu"; + +@@ -473,6 +479,11 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { + Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; + break; + } ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple); ++ Loader = "ld.so.1"; ++ break; + case llvm::Triple::m68k: + LibDir = "lib"; + Loader = "ld.so.1"; +@@ -741,6 +752,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { + const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; + const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; + const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; ++ const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; + SanitizerMask Res = ToolChain::getSupportedSanitizers(); + Res |= SanitizerKind::Address; + Res |= SanitizerKind::PointerCompare; +@@ -751,19 +763,20 @@ SanitizerMask Linux::getSupportedSanitizers() const { + Res |= SanitizerKind::Memory; + Res |= SanitizerKind::Vptr; + Res |= SanitizerKind::SafeStack; +- if (IsX86_64 || IsMIPS64 || IsAArch64) ++ if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64) + Res |= SanitizerKind::DataFlow; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || +- IsRISCV64 || IsSystemZ || IsHexagon) ++ IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) + Res |= SanitizerKind::Leak; +- if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ) ++ if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || ++ IsLoongArch64) + Res |= SanitizerKind::Thread; + if (IsX86_64) + Res |= SanitizerKind::KernelMemory; + if (IsX86 || IsX86_64) + Res |= SanitizerKind::Function; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || +- IsPowerPC64 || IsHexagon) ++ IsPowerPC64 || IsHexagon || IsLoongArch64) + Res |= SanitizerKind::Scudo; + if (IsX86_64 || IsAArch64) { + Res |= SanitizerKind::HWAddress; +diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp +index 63b575178..4e3ae3f25 100644 +--- a/clang/lib/Driver/XRayArgs.cpp ++++ b/clang/lib/Driver/XRayArgs.cpp +@@ -42,6 +42,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { + case llvm::Triple::aarch64: + case llvm::Triple::hexagon: + case llvm::Triple::ppc64le: ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index 6e2060991..2ec170cc2 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -68,6 +68,12 @@ set(hlsl_files + hlsl/hlsl_intrinsics.h + ) + ++set(loongarch_files ++ lasxintrin.h ++ larchintrin.h ++ lsxintrin.h ++ ) ++ + set(mips_msa_files + msa.h + ) +@@ -220,6 +226,7 @@ set(files + ${hexagon_files} + ${hip_files} + ${hlsl_files} ++ ${loongarch_files} + ${mips_msa_files} + ${opencl_files} + ${ppc_files} +@@ -381,6 +388,7 @@ add_dependencies("clang-resource-headers" + "hexagon-resource-headers" + "hip-resource-headers" + "hlsl-resource-headers" ++ "loongarch-resource-headers" + "mips-resource-headers" + "ppc-resource-headers" + "ppc-htm-resource-headers" +@@ -404,6 +412,7 @@ add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_on + add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files}") + add_header_target("hexagon-resource-headers" "${hexagon_files}") + add_header_target("hip-resource-headers" "${hip_files}") ++add_header_target("loongarch-resource-headers" "${loongarch_files}") + add_header_target("mips-resource-headers" "${mips_msa_files}") + add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}") + add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}") +@@ -494,6 +503,12 @@ install( + EXCLUDE_FROM_ALL + COMPONENT hip-resource-headers) + ++install( ++ FILES ${loongarch_files} ++ DESTINATION ${header_install_dir} ++ EXCLUDE_FROM_ALL ++ COMPONENT loongarch-resource-headers) ++ + install( + FILES ${mips_msa_files} + DESTINATION ${header_install_dir} +diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h +new file mode 100644 +index 000000000..b5acf218b +--- /dev/null ++++ b/clang/lib/Headers/larchintrin.h +@@ -0,0 +1,338 @@ ++//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch Base intrinsics ++// ++//===----------------------------------------------------------------------===// ++#ifndef __LOONGARCH_BASE_H ++#define __LOONGARCH_BASE_H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef struct drdtime{ ++ unsigned long dvalue; ++ unsigned long dtimeid; ++} __drdtime_t; ++ ++typedef struct rdtime{ ++ unsigned int value; ++ unsigned int timeid; ++} __rdtime_t; ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrrd_w(/*uimm14_32*/ _1) \ ++ ((unsigned int)__builtin_loongarch_csrrd_w(_1)) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrwr_w(/*unsigned int*/ _1, /*uimm14_32*/ _2) \ ++ ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned int, unsigned int, uimm14_32 */ ++#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, \ ++ /*uimm14_32*/ _3) \ ++ ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ ++ (unsigned int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __csrrd_d(/*uimm14*/ _1) \ ++ ((unsigned long int)__builtin_loongarch_csrrd_d(_1)) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __csrwr_d(/*unsigned long int*/ _1, /*uimm14*/ _2) \ ++ ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ ++ (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned long int, unsigned long int, uimm14 */ ++#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ ++ /*uimm14*/ _3) \ ++ ((unsigned long int)__builtin_loongarch_csrxchg_d( \ ++ (unsigned long int)(_1), (unsigned long int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned char __iocsrrd_b(unsigned int _1) ++{ ++ return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned short __iocsrrd_h(unsigned int _1) ++{ ++ return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned int __iocsrrd_w(unsigned int _1) ++{ ++ return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned long int __iocsrrd_d(unsigned int _1) ++{ ++ return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_b(unsigned char _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_h(unsigned short _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_w(unsigned int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); ++} ++ ++extern __inline unsigned int ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++ __cpucfg(unsigned int _1) { ++ return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_d(unsigned long int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned int, simm12 */ ++#define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ ++ ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) ++ ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned long int, simm12 */ ++#define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ ++ ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) ++ ++#define __rdtime_d __builtin_loongarch_rdtime_d ++#define __rdtimel_w __builtin_loongarch_rdtimel_w ++#define __rdtimeh_w __builtin_loongarch_rdtimeh_w ++ ++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtime_d (void) ++{ ++ __drdtime_t drdtime; ++ __asm__ volatile ( ++ "rdtime.d\t%[val],%[tid]\n\t" ++ : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) ++ : ++ ); ++ return drdtime; ++} ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimeh_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimeh.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimel_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimel.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbclr() ++{ ++ return (void)__builtin_loongarch_tlbclr(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbflush() ++{ ++ return (void)__builtin_loongarch_tlbflush(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbfill() ++{ ++ return (void)__builtin_loongarch_tlbfill(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbrd() ++{ ++ return (void)__builtin_loongarch_tlbrd(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbwr() ++{ ++ return (void)__builtin_loongarch_tlbwr(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbsrch() ++{ ++ return (void)__builtin_loongarch_tlbsrch(); ++} ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __syscall(/*uimm15*/ _1) ((void)__builtin_loongarch_syscall(_1)) ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __break(/*uimm15*/ _1) ((void)__builtin_loongarch_break(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __dbar(/*uimm15*/ _1) ((void)__builtin_loongarch_dbar(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __ibar(/*uimm15*/ _1) ((void)__builtin_loongarch_ibar(_1)) ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtle_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtgt_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++} ++ ++#define __movfcsr2gr(uimm5) \ ++({ \ ++ unsigned int rd; \ ++ __asm__ volatile ( \ ++ "movfcsr2gr %0, $fcsr" #uimm5 \ ++ : "=&r"(rd) \ ++ : \ ++ ); rd; \ ++}) ++ ++#define __movgr2fcsr(uimm5, rj) \ ++{ \ ++ __asm__ volatile ( \ ++ "movgr2fcsr $fcsr" #uimm5 ", %0" \ ++ : \ ++ : "r" (rj) \ ++ ); \ ++} ++ ++#ifdef __cplusplus ++} ++#endif ++#endif /* __LOONGARCH_BASE_H */ +diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h +new file mode 100644 +index 000000000..c454b0c9e +--- /dev/null ++++ b/clang/lib/Headers/lasxintrin.h +@@ -0,0 +1,5337 @@ ++//===----------- lasxintrin.h - LoongArch LASX intrinsics ++//------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LASX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_ASXINTRIN_H ++#define _GCC_LOONGSON_ASXINTRIN_H 1 ++ ++#if defined(__loongarch_asx) ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); ++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); ++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI. */ ++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI. */ ++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI. */ ++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI. */ ++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI. */ ++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI. */ ++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI. */ ++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI. */ ++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui1. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvand_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvnor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvxor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, USI. */ ++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, USI. */ ++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V16HI, V16HI, USI. */ ++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, USI. */ ++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V32QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_b(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V16HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_h(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V8SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_w(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V4DI, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_d(long int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfadd_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfadd_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsub_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsub_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmul_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmul_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { ++ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmin_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmin_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmina_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmina_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmax_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmax_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecip_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecip_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrint_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrint_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvflogb_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvflogb_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvth_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvth_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvtl_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvtl_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_w(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_l(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_wu(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_lu(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V32QI, V32QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_b(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V16HI, V16HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_h(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V8SI, V8SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_w(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V4DI, V4DI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_d(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvandn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V16HI, V32QI, UQI. */ ++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V8SI, V16HI, UQI. */ ++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V8SI, UQI. */ ++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV16HI, UV32QI, UQI. */ ++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV8SI, UV16HI, UQI. */ ++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV8SI, UQI. */ ++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI. */ ++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI. */ ++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { ++ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftinth_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffinth_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffintl_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrph_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrpl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrmh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrml_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrneh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrnel_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrne_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrne_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrz_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrz_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrp_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrp_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrm_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrm_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI. */ ++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V16HI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V8SI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V4DI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI. */ ++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI. */ ++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvorn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, i13. */ ++/* Data types in instruction templates: V4DI, HI. */ ++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) ++ ++/* Assembly instruction format: xd, rj, rk. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvldx(void const *_1, long int _2) { ++ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: xd, rj, rk. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lasx_xvstx(__m256i _1, void *_2, long int _3) { ++ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, rj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, SI, UQI. */ ++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: xd, rj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, DI, UQI. */ ++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_q(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, USI. */ ++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvperm_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si11. */ ++/* Data types in instruction templates: V16HI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si10. */ ++/* Data types in instruction templates: V8SI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si9. */ ++/* Data types in instruction templates: V4DI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui3. */ ++/* Data types in instruction templates: SI, V8SI, UQI. */ ++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui3. */ ++/* Data types in instruction templates: USI, V8SI, UQI. */ ++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui2. */ ++/* Data types in instruction templates: DI, V4DI, UQI. */ ++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui2. */ ++/* Data types in instruction templates: UDI, V4DI, UQI. */ ++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskgez_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsknz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV16HI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV4DI. */ ++#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV16HI. */ ++#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV8SI. */ ++#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV4DI. */ ++#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV16HI. */ ++#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV8SI. */ ++#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DF, V4DF, UQI. */ ++#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ++ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SF, V8SF, UQI. */ ++#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ ++ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V32QI, HI. */ ++#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V4DI, HI. */ ++#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V16HI, HI. */ ++#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V8SI, HI. */ ++#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) ++ ++#endif /* defined(__loongarch_asx). */ ++#endif /* _GCC_LOONGSON_ASXINTRIN_H. */ +diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h +new file mode 100644 +index 000000000..48344c209 +--- /dev/null ++++ b/clang/lib/Headers/lsxintrin.h +@@ -0,0 +1,5162 @@ ++//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LSX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_SXINTRIN_H ++#define _GCC_LOONGSON_SXINTRIN_H 1 ++ ++#if defined(__loongarch_sx) ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI. */ ++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI. */ ++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI. */ ++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI. */ ++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI. */ ++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI. */ ++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI. */ ++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI. */ ++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V16QI, V16QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_b(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V8HI, V8HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_h(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V4SI, V4SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_w(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V2DI, V2DI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_d(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui2. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui1. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vand_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vnor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vxor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, USI. */ ++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V16QI, V16QI, USI. */ ++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V8HI, V8HI, USI. */ ++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, USI. */ ++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V16QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_b(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V8HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_h(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V4SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_w(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V2DI, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_d(long int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: rd, vj, ui4. */ ++/* Data types in instruction templates: SI, V16QI, UQI. */ ++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui3. */ ++/* Data types in instruction templates: SI, V8HI, UQI. */ ++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui2. */ ++/* Data types in instruction templates: SI, V4SI, UQI. */ ++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui1. */ ++/* Data types in instruction templates: DI, V2DI, UQI. */ ++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui4. */ ++/* Data types in instruction templates: USI, V16QI, UQI. */ ++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui3. */ ++/* Data types in instruction templates: USI, V8HI, UQI. */ ++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui2. */ ++/* Data types in instruction templates: USI, V4SI, UQI. */ ++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui1. */ ++/* Data types in instruction templates: UDI, V2DI, UQI. */ ++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, SI, UQI. */ ++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui3. */ ++/* Data types in instruction templates: V8HI, V8HI, SI, UQI. */ ++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui2. */ ++/* Data types in instruction templates: V4SI, V4SI, SI, UQI. */ ++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui1. */ ++/* Data types in instruction templates: V2DI, V2DI, DI, UQI. */ ++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfadd_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfadd_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsub_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsub_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmul_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmul_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfdiv_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfdiv_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { ++ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmin_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmin_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmina_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmina_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmax_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmax_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecip_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecip_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrint_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrint_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vflogb_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vflogb_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvth_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvth_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvtl_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvtl_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_w(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_l(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_wu(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_lu(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vandn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V8HI, V16QI, UQI. */ ++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V4SI, V8HI, UQI. */ ++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V4SI, UQI. */ ++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV8HI, UV16QI, UQI. */ ++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV4SI, UV8HI, UQI. */ ++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV4SI, UQI. */ ++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI. */ ++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI. */ ++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_l(__m128i _1, __m128i _2) { ++ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftinth_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffinth_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffintl_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrpl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrph_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrml_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrmh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrnel_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrneh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrne_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrne_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrz_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrz_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrp_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrp_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrm_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrm_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V8HI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V4SI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V2DI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si11. */ ++/* Data types in instruction templates: V8HI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si10. */ ++/* Data types in instruction templates: V4SI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si9. */ ++/* Data types in instruction templates: V2DI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskgez_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsknz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_h_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_w_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_d_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV8HI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_hu_bu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_wu_hu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_du_wu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI. */ ++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vorn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, i13. */ ++/* Data types in instruction templates: V2DI, HI. */ ++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, rj, rk. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vldx(void const *_1, long int _2) { ++ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: vd, rj, rk. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lsx_vstx(__m128i _1, void *_2, long int _3) { ++ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); ++} ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV2DI. */ ++#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV8HI. */ ++#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV4SI. */ ++#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV2DI. */ ++#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV8HI. */ ++#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV4SI. */ ++#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V16QI, HI. */ ++#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V2DI, HI. */ ++#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V8HI, HI. */ ++#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V4SI, HI. */ ++#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) ++ ++#endif /* defined(__loongarch_sx) */ ++#endif /* _GCC_LOONGSON_SXINTRIN_H */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index dae51d069..5504f9937 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -1981,6 +1981,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall); ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); + } + } + +@@ -4445,6 +4448,559 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, + return false; + } + ++// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the ++// intrinsic is correct. ++// ++// FIXME: The size tests here should instead be tablegen'd along with the ++// definitions from include/clang/Basic/BuiltinsLoongArch.def. ++// FIXME: GCC is strict on signedness for some of these intrinsics, we should ++// be too. ++bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall) { ++ unsigned i = 0, l = 0, u = 0, m = 0; ++ switch (BuiltinID) { ++ default: return false; ++ // LSX/LASX intrinsics. ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate. ++ // The first block of intrinsics actually have an unsigned 5 bit field, ++ // not a df/n field. ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ i = 1; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ i = 2; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ // These intrinsics take an unsigned 6 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ i = 1; ++ l = 0; ++ u = 63; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ i = 2; ++ l = 0; ++ u = 63; ++ break; ++ // These intrinsics take an unsigned 7 bit immediate. ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ i = 2; ++ l = 0; ++ u = 127; ++ break; ++ // These intrinsics take a signed 5 bit immediate. ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ i = 1; ++ l = -16; ++ u = 15; ++ break; ++ // These intrinsics take a signed 9 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ i = 1; ++ l = -256; ++ u = 255; ++ break; ++ // These intrinsics take an unsigned 8 bit immediate. ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ i = 1; ++ l = 0; ++ u = 255; ++ break; ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ i = 2; ++ l = 0; ++ u = 255; ++ break; ++ // df/n format ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ i = 2; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ // These intrinsics take an unsigned 2 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ i = 1; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ // These intrinsics take an unsigned 1 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ i = 1; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ // Memory offsets and immediate loads. ++ // These intrinsics take a signed 10 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ i = 1; ++ l = -512; ++ u = 511; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ i = 1; ++ l = -1024; ++ u = 1023; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvld: ++ case LoongArch::BI__builtin_lsx_vld: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lsx_vst: ++ case LoongArch::BI__builtin_lasx_xvst: ++ i = 2; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldi: ++ case LoongArch::BI__builtin_lsx_vldi: ++ i = 0; ++ l = -4096; ++ u = 4095; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate. ++ case LoongArch::BI__builtin_loongarch_cacop_w: ++ case LoongArch::BI__builtin_loongarch_cacop_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || ++ SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ // These intrinsics take an unsigned 14 bit immediate. ++ case LoongArch::BI__builtin_loongarch_csrrd_w: ++ case LoongArch::BI__builtin_loongarch_csrrd_d: ++ i = 0; ++ l = 0; ++ u = 16383; ++ break; ++ case LoongArch::BI__builtin_loongarch_csrwr_w: ++ case LoongArch::BI__builtin_loongarch_csrwr_d: ++ i = 1; ++ l = 0; ++ u = 16383; ++ break; ++ case LoongArch::BI__builtin_loongarch_csrxchg_w: ++ case LoongArch::BI__builtin_loongarch_csrxchg_d: ++ i = 2; ++ l = 0; ++ u = 16383; ++ break; ++ // These intrinsics take an unsigned 15 bit immediate. ++ case LoongArch::BI__builtin_loongarch_dbar: ++ case LoongArch::BI__builtin_loongarch_ibar: ++ case LoongArch::BI__builtin_loongarch_syscall: ++ case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break; ++ } ++ ++ if (!m) ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u); ++ ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u) || ++ SemaBuiltinConstantArgMultiple(TheCall, i, m); ++} ++ + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *TheCall) { + if (BuiltinID == SystemZ::BI__builtin_tabort) { +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c +new file mode 100644 +index 000000000..aa8f63094 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c +@@ -0,0 +1,471 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ ++ // RUN: -emit-llvm %s -o - | FileCheck %s ++ ++/// This test checks the calling convention of the lp64d ABI. ++ ++#include ++#include ++ ++/// Part 0: C Data Types and Alignment. ++ ++/// `char` datatype is signed by default. ++/// In most cases, the unsigned integer data types are zero-extended when stored ++/// in general-purpose register, and the signed integer data types are ++/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as ++/// unsigned int, are stored in general-purpose registers as proper sign ++/// extensions of their 32-bit values. ++ ++// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool() ++_Bool check_bool() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i8 @check_char() ++char check_char() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i16 @check_short() ++short check_short() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i32 @check_int() ++int check_int() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_long() ++long check_long() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_longlong() ++long long check_longlong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar() ++unsigned char check_uchar() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort() ++unsigned short check_ushort() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i32 @check_uint() ++unsigned int check_uint() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_ulong() ++unsigned long check_ulong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_ulonglong() ++unsigned long long check_ulonglong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} float @check_float() ++float check_float() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} double @check_double() ++double check_double() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} fp128 @check_longdouble() ++long double check_longdouble() { return 0; } ++ ++/// Part 1: Scalar arguments and return value. ++ ++/// The lp64d abi says: ++/// 1. 1 < WOA <= GRLEN ++/// a. Argument is passed in a single argument register, or on the stack by ++/// value if none is available. ++/// i. If the argument is floating-point type, the argument is passed in FAR. if ++/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s ++/// passed on the stack. When passed in registers or on the stack, ++/// floating-point types narrower than GRLEN bits are widened to GRLEN bits, ++/// with the upper bits undefined. ++/// ii. If the argument is integer or pointer type, the argument is passed in ++/// GAR. If no GAR is available, it’s passed on the stack. When passed in ++/// registers or on the stack, the unsigned integer scalars narrower than GRLEN ++/// bits are zero-extended to GRLEN bits, and the signed integer scalars are ++/// sign-extended. ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in ++/// the lower-numbered register and the high-order GRLEN bits in the ++/// higher-numbered register. If exactly one register is available, the ++/// low-order GRLEN bits are passed in the register and the high-order GRLEN ++/// bits are passed on the stack. If no GAR is available, it’s passed on the ++/// stack. ++ ++/// Note that most of these conventions are handled at the llvm side, so here we ++/// only check the correctness of argument (or return value)'s sign/zero ++/// extension attribute. ++ ++// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p) ++int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f, ++ uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i, ++ int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n, ++ uint32_t o, int64_t p) { ++ return 0; ++} ++ ++/// Part 2: Structure arguments and return value. ++ ++/// The lp64d abi says: ++/// Empty structures are ignored by C compilers which support them as a ++/// non-standard extension(same as union arguments and return values). Bits ++/// unused due to padding, and bits past the end of a structure whose size in ++/// bits is not divisible by GRLEN, are undefined. And the layout of the ++/// structure on the stack is consistent with that in memory. ++ ++/// Check empty structs are ignored. ++ ++struct empty_s {}; ++ ++// CHECK-LABEL: define{{.*}} void @f_empty_s() ++struct empty_s f_empty_s(struct empty_s x) { ++ return x; ++} ++ ++/// 1. 0 < WOA ≤ GRLEN ++/// a. The structure has only fixed-point members. If there is an available GAR, ++/// the structure is passed through the GAR by value passing; If no GAR is ++/// available, it’s passed on the stack. ++ ++struct i16x4_s { ++ int16_t a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce) ++struct i16x4_s f_i16x4_s(struct i16x4_s x) { ++ return x; ++} ++ ++/// b. The structure has only floating-point members: ++/// i. One floating-point member. The argument is passed in a FAR; If no FAR is ++/// available, the value is passed in a GAR; if no GAR is available, the value ++/// is passed on the stack. ++ ++struct f32x1_s { ++ float a; ++}; ++ ++struct f64x1_s { ++ double a; ++}; ++ ++// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0) ++struct f32x1_s f_f32x1_s(struct f32x1_s x) { ++ return x; ++} ++ ++// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0) ++struct f64x1_s f_f64x1_s(struct f64x1_s x) { ++ return x; ++} ++ ++/// ii. Two floating-point members. The argument is passed in a pair of ++/// available FAR, with the low-order float member bits in the lower-numbered ++/// FAR and the high-order float member bits in the higher-numbered FAR. If the ++/// number of available FAR is less than 2, it’s passed in a GAR, and passed on ++/// the stack if no GAR is available. ++ ++struct f32x2_s { ++ float a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1) ++struct f32x2_s f_f32x2_s(struct f32x2_s x) { ++ return x; ++} ++ ++/// c. The structure has both fixed-point and floating-point members, i.e. the ++/// structure has one float member and... ++/// i. Multiple fixed-point members. If there are available GAR, the structure ++/// is passed in a GAR, and passed on the stack if no GAR is available. ++ ++struct f32x1_i16x2_s { ++ float a; ++ int16_t b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce) ++struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { ++ return x; ++} ++ ++/// ii. Only one fixed-point member. If one FAR and one GAR are available, the ++/// floating-point member of the structure is passed in the FAR, and the integer ++/// member of the structure is passed in the GAR; If no floating-point register ++/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s ++/// passed on the stack. ++ ++struct f32x1_i32x1_s { ++ float a; ++ int32_t b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1) ++struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) { ++ return x; ++} ++ ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. Only fixed-point members. ++/// i. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack, and passed on the stack if ++/// no GAR is available. ++ ++struct i64x2_s { ++ int64_t a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce) ++struct i64x2_s f_i64x2_s(struct i64x2_s x) { ++ return x; ++} ++ ++/// b. Only floating-point members. ++/// i. The structure has one long double member or one double member and two ++/// adjacent float members or 3-4 float members. The argument is passed in a ++/// pair of available GAR, with the low-order bits in the lower-numbered GAR and ++/// the high-order bits in the higher-numbered GAR. If only one GAR is ++/// available, the low-order bits are in the GAR and the high-order bits are on ++/// the stack, and passed on the stack if no GAR is available. ++ ++struct f128x1_s { ++ long double a; ++}; ++ ++// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce) ++struct f128x1_s f_f128x1_s(struct f128x1_s x) { ++ return x; ++} ++ ++struct f64x1_f32x2_s { ++ double a; ++ float b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce) ++struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) { ++ return x; ++} ++ ++struct f32x3_s { ++ float a, b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce) ++struct f32x3_s f_f32x3_s(struct f32x3_s x) { ++ return x; ++} ++ ++struct f32x4_s { ++ float a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce) ++struct f32x4_s f_f32x4_s(struct f32x4_s x) { ++ return x; ++} ++ ++/// ii. The structure with two double members is passed in a pair of available ++/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with ++/// one double member and one float member is same. ++ ++struct f64x2_s { ++ double a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1) ++struct f64x2_s f_f64x2_s(struct f64x2_s x) { ++ return x; ++} ++ ++/// c. Both fixed-point and floating-point members. ++/// i. The structure has one double member and only one fixed-point member. ++/// A. If one FAR and one GAR are available, the floating-point member of the ++/// structure is passed in the FAR, and the integer member of the structure is ++/// passed in the GAR; If no floating-point registers but two GARs are ++/// available, it’s passed in the two GARs; If only one GAR is available, the ++/// low-order bits are in the GAR and the high-order bits are on the stack; And ++/// it’s passed on the stack if no GAR is available. ++ ++struct f64x1_i64x1_s { ++ double a; ++ int64_t b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1) ++struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) { ++ return x; ++} ++ ++/// ii. Others ++/// A. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack, and passed on the stack if ++/// no GAR is available. ++ ++struct f64x1_i32x2_s { ++ double a; ++ int32_t b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce) ++struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) { ++ return x; ++} ++ ++struct f32x2_i32x2_s { ++ float a, b; ++ int32_t c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce) ++struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) { ++ return x; ++} ++ ++/// 3. WOA > 2 × GRLEN ++/// a. It’s passed by reference and are replaced in the argument list with the ++/// address. If there is an available GAR, the reference is passed in the GAR, ++/// and passed on the stack if no GAR is available. ++ ++struct i64x4_s { ++ int64_t a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, ptr{{.*}} %x) ++struct i64x4_s f_i64x4_s(struct i64x4_s x) { ++ return x; ++} ++ ++struct f64x4_s { ++ double a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, ptr{{.*}} %x) ++struct f64x4_s f_f64x4_s(struct f64x4_s x) { ++ return x; ++} ++ ++/// Part 3: Union arguments and return value. ++ ++/// Check empty unions are ignored. ++ ++union empty_u {}; ++ ++// CHECK-LABEL: define{{.*}} void @f_empty_u() ++union empty_u f_empty_u(union empty_u x) { ++ return x; ++} ++ ++/// Union is passed in GAR or stack. ++/// 1. 0 < WOA ≤ GRLEN ++/// a. The argument is passed in a GAR, or on the stack by value if no GAR is ++/// available. ++ ++union i32_f32_u { ++ int32_t a; ++ float b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce) ++union i32_f32_u f_i32_f32_u(union i32_f32_u x) { ++ return x; ++} ++ ++union i64_f64_u { ++ int64_t a; ++ double b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce) ++union i64_f64_u f_i64_f64_u(union i64_f64_u x) { ++ return x; ++} ++ ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack. The arguments are passed ++/// on the stack when no GAR is available. ++ ++union i128_f128_u { ++ __int128_t a; ++ long double b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce) ++union i128_f128_u f_i128_f128_u(union i128_f128_u x) { ++ return x; ++} ++ ++/// 3. WOA > 2 × GRLEN ++/// a. It’s passed by reference and are replaced in the argument list with the ++/// address. If there is an available GAR, the reference is passed in the GAR, ++/// and passed on the stack if no GAR is available. ++ ++union i64_arr3_u { ++ int64_t a[3]; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, ptr{{.*}} %x) ++union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { ++ return x; ++} ++ ++/// Part 4: Complex number arguments and return value. ++ ++/// A complex floating-point number, or a structure containing just one complex ++/// floating-point number, is passed as though it were a structure containing ++/// two floating-point reals. ++ ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1) ++float __complex__ f_floatcomplex(float __complex__ x) { return x; } ++ ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1) ++double __complex__ f_doublecomplex(double __complex__ x) { return x; } ++ ++struct floatcomplex_s { ++ float __complex__ c; ++}; ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1) ++struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) { ++ return x; ++} ++ ++struct doublecomplex_s { ++ double __complex__ c; ++}; ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1) ++struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { ++ return x; ++} ++ ++/// Part 5: Variadic arguments. ++ ++/// Variadic arguments are passed in GARs in the same manner as named arguments. ++ ++int f_va_callee(int, ...); ++ ++// CHECK-LABEL: define{{.*}} void @f_va_caller() ++// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) ++void f_va_caller(void) { ++ f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9}, ++ (struct i64x2_s){10, 11}); ++} ++ ++// CHECK-LABE: define signext i32 @f_va_int(ptr{{.*}} %fmt, ...) ++// CHECK: entry: ++// CHECK: %fmt.addr = alloca ptr, align 8 ++// CHECK: %va = alloca ptr, align 8 ++// CHECK: %v = alloca i32, align 4 ++// CHECK: store ptr %fmt, ptr %fmt.addr, align 8 ++// CHECK: call void @llvm.va_start(ptr %va) ++// CHECK: %argp.cur = load ptr, ptr %va, align 8 ++// CHECK: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8 ++// CHECK: store ptr %argp.next, ptr %va, align 8 ++// CHECK: %0 = load i32, ptr %argp.cur, align 8 ++// CHECK: store i32 %0, ptr %v, align 4 ++// CHECK: call void @llvm.va_end(ptr %va) ++// CHECK: %1 = load i32, ptr %v, align 4 ++// CHECK: ret i32 %1 ++// CHECK: } ++int f_va_int(char *fmt, ...) { ++ __builtin_va_list va; ++ __builtin_va_start(va, fmt); ++ int v = __builtin_va_arg(va, int); ++ __builtin_va_end(va); ++ return v; ++} +diff --git a/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +new file mode 100644 +index 000000000..e4a03d782 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +@@ -0,0 +1,49 @@ ++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \ ++// RUN: | FileCheck %s ++ ++float f; ++double d; ++ ++// CHECK-LABEL: @reg_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f ++// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]]) ++// CHECK: ret void ++void reg_float() { ++ float a = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f ++// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]]) ++// CHECK: ret void ++void r4_float() { ++ register float a asm("$r4") = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @reg_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d ++// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]]) ++// CHECK: ret void ++void reg_double() { ++ double a = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d ++// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]]) ++// CHECK: ret void ++void r4_double() { ++ register double a asm("$r4") = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-base.c b/clang/test/CodeGen/builtins-loongarch-base.c +new file mode 100644 +index 000000000..cdff582fa +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-base.c +@@ -0,0 +1,409 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++typedef char i8; ++typedef unsigned char u8; ++typedef short i16; ++typedef unsigned short u16; ++typedef int i32; ++typedef unsigned int u32; ++ ++#if __LONG_MAX__ == __LONG_LONG_MAX__ ++typedef long int i64; ++typedef unsigned long int u64; ++#else ++typedef long long i64; ++typedef unsigned long long u64; ++#endif ++ ++__drdtime_t drdtime; ++__rdtime_t rdtime; ++ ++void cpucfg(){ ++ ++ u32 u32_r, u32_a; ++ // __cpucfg ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg ++ ++} ++ ++void csrrd_w() { ++ ++ u32 u32_r; ++ // __csrrd_w ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrrd_w(1); // CHECK: call i32 @llvm.loongarch.csrrd.w ++} ++ ++void csrrd_d() { ++ ++ u64 u64_r; ++ // __csrrd_d ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrrd_d(1); // CHECK: call i64 @llvm.loongarch.csrrd.d ++} ++ ++void csrwr_w() { ++ ++ u32 u32_r, u32_a; ++ // __csrwr_w ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrwr_w(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr.w ++} ++ ++void csrwr_d() { ++ ++ u64 u64_r, u64_a; ++ // __csrwr_d ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrwr_d(u64_a, 1); // CHECK: call i64 @llvm.loongarch.csrwr.d ++} ++ ++void csrxchg_w() { ++ ++ u32 u32_r, u32_a, u32_b; ++ // __csrxchg_w ++ // rd, rj, csr_num ++ // unsigned int, unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrxchg_w(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg.w ++} ++ ++void csrxchg_d() { ++ ++ u64 u64_r, u64_a, u64_b; ++ // __csrxchg_d ++ // rd, rj, csr_num ++ // unsigned long int, unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrxchg_d(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.csrxchg.d ++} ++ ++void iocsrrd_b(){ ++ ++ u32 u32_a; ++ u8 u8_r; ++ // __iocsrrd_b ++ // rd, rj ++ // unsigned char, unsigned int ++ u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b ++ ++} ++ ++void iocsrrd_h(){ ++ ++ u32 u32_a; ++ u16 u16_r; ++ // __iocsrrd_h ++ // rd, rj ++ // unsigned short, unsigned int ++ u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h ++ ++} ++ ++void iocsrrd_w(){ ++ ++ u32 u32_r, u32_a; ++ // __iocsrrd_w ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w ++ ++} ++ ++void iocsrrd_d(){ ++ ++ u32 u32_a; ++ u64 u64_r; ++ // __iocsrrd_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d ++ ++} ++ ++void iocsrwr_b(){ ++ ++ u32 u32_a; ++ u8 u8_a; ++ // __iocsrwr_b ++ // rd, rj ++ // unsigned char, unsigned int ++ __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b ++ ++} ++ ++void iocsrwr_h(){ ++ ++ u32 u32_a; ++ u16 u16_a; ++ // __iocsrwr_h ++ // rd, rj ++ // unsigned short, unsigned int ++ __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h ++ ++} ++ ++void iocsrwr_w(){ ++ ++ u32 u32_a, u32_b; ++ // __iocsrwr_w ++ // rd, rj ++ // unsigned int, unsigned int ++ __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w ++ ++} ++ ++void iocsrwr_d(){ ++ ++ u32 u32_a; ++ u64 u64_a; ++ // __iocsrwr_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d ++ ++} ++ ++void cacop_w() { ++ ++ i32 i32_a; ++ // __cacop_w ++ // op, rj, si12 ++ // uimm5, unsigned int, simm12 ++ __builtin_loongarch_cacop_w(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop.w ++} ++ ++void cacop_d() { ++ ++ i64 i64_a; ++ // __cacop_d ++ // op, rj, si12 ++ // uimm5, unsigned long int, simm12 ++ __builtin_loongarch_cacop_d(1, i64_a, 2); // CHECK: void @llvm.loongarch.cacop.d ++} ++ ++void rdtime_d(){ ++ ++ drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimeh_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimel_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void crc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w ++ ++} ++ ++void crc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w ++ ++} ++ ++void crc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w ++ ++} ++ ++void crc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w ++ ++} ++ ++void crcc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crcc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w ++ ++} ++ ++void crcc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crcc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w ++ ++} ++ ++void crcc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crcc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w ++ ++} ++ ++void crcc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crcc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w ++ ++} ++ ++void tlbclr(){ ++ ++ // __tlbclr ++ __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr ++ ++} ++ ++void tlbflush(){ ++ ++ // __tlbflush ++ __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush ++ ++} ++ ++void tlbfill(){ ++ ++ // __tlbfill ++ __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill ++ ++} ++ ++void tlbrd(){ ++ ++ // __tlbrd ++ __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd ++ ++} ++ ++void tlbwr(){ ++ ++ // __tlbwr ++ __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr ++ ++} ++ ++void tlbsrch(){ ++ ++ // __tlbsrch ++ __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch ++ ++} ++ ++void syscall(){ ++ ++ // __syscall ++ // Code ++ // uimm15 ++ __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall ++ ++} ++ ++void break_builtin(){ ++ ++ // __break ++ // Code ++ // uimm15 ++ __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break ++ ++} ++ ++void asrtle_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtle_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d ++ ++} ++ ++void asrtgt_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtgt_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d ++ ++} ++ ++void dbar(){ ++ ++ // __dbar ++ // hint ++ // uimm15 ++ __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar ++ ++} ++ ++void ibar(){ ++ ++ // __ibar ++ // hint ++ // uimm15 ++ __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar ++ ++} ++ ++void movfcsr2gr(){ ++ ++ u32 u32_r; ++ // __movfcsr2gr ++ u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() ++ ++} ++ ++ ++void movgr2fcsr() { ++ ++ u32 u32_a; ++ // __movgr2fcsr ++ __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) ++ ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lasx-error.c b/clang/test/CodeGen/builtins-loongarch-lasx-error.c +new file mode 100644 +index 000000000..99f2687e4 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lasx-error.c +@@ -0,0 +1,266 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v32i8_r = __lasx_xvslli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvslli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvslli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmini_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmini_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmini_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmini_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmini_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslti_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslti_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslti_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslti_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvslei_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslei_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslei_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslei_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslei_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsat_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsat_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsat_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsat_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvsat_du(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v32u8_r = __lasx_xvandi_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvnori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvxori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvld(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(v32i8_a, &v32i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lasx.c b/clang/test/CodeGen/builtins-loongarch-lasx.c +new file mode 100644 +index 000000000..0d6a54cb0 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lasx.c +@@ -0,0 +1,3772 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1_b 1 ++#define ui2 1 ++#define ui2_b ui2 ++#define ui3 4 ++#define ui3_b ui3 ++#define ui4 7 ++#define ui4_b ui4 ++#define ui5 25 ++#define ui5_b ui5 ++#define ui6 44 ++#define ui6_b ui6 ++#define ui7 100 ++#define ui7_b ui7 ++#define ui8 127 //200 ++#define ui8_b ui8 ++#define si5_b -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 0 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lasx_xvsll_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b( ++ ++ // __lasx_xvsll_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h( ++ ++ // __lasx_xvsll_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w( ++ ++ // __lasx_xvsll_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d( ++ ++ // __lasx_xvslli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b( ++ ++ // __lasx_xvslli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h( ++ ++ // __lasx_xvslli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w( ++ ++ // __lasx_xvslli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d( ++ ++ // __lasx_xvsra_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b( ++ ++ // __lasx_xvsra_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h( ++ ++ // __lasx_xvsra_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w( ++ ++ // __lasx_xvsra_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d( ++ ++ // __lasx_xvsrai_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b( ++ ++ // __lasx_xvsrai_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h( ++ ++ // __lasx_xvsrai_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w( ++ ++ // __lasx_xvsrai_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d( ++ ++ // __lasx_xvsrar_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b( ++ ++ // __lasx_xvsrar_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h( ++ ++ // __lasx_xvsrar_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w( ++ ++ // __lasx_xvsrar_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d( ++ ++ // __lasx_xvsrari_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b( ++ ++ // __lasx_xvsrari_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h( ++ ++ // __lasx_xvsrari_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w( ++ ++ // __lasx_xvsrari_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d( ++ ++ // __lasx_xvsrl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b( ++ ++ // __lasx_xvsrl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h( ++ ++ // __lasx_xvsrl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w( ++ ++ // __lasx_xvsrl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d( ++ ++ // __lasx_xvsrli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b( ++ ++ // __lasx_xvsrli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h( ++ ++ // __lasx_xvsrli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w( ++ ++ // __lasx_xvsrli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d( ++ ++ // __lasx_xvsrlr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b( ++ ++ // __lasx_xvsrlr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h( ++ ++ // __lasx_xvsrlr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w( ++ ++ // __lasx_xvsrlr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d( ++ ++ // __lasx_xvsrlri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b( ++ ++ // __lasx_xvsrlri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h( ++ ++ // __lasx_xvsrlri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w( ++ ++ // __lasx_xvsrlri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d( ++ ++ // __lasx_xvbitclr_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b( ++ ++ // __lasx_xvbitclr_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h( ++ ++ // __lasx_xvbitclr_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w( ++ ++ // __lasx_xvbitclr_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d( ++ ++ // __lasx_xvbitclri_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b( ++ ++ // __lasx_xvbitclri_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h( ++ ++ // __lasx_xvbitclri_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w( ++ ++ // __lasx_xvbitclri_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d( ++ ++ // __lasx_xvbitset_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b( ++ ++ // __lasx_xvbitset_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h( ++ ++ // __lasx_xvbitset_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w( ++ ++ // __lasx_xvbitset_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d( ++ ++ // __lasx_xvbitseti_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b( ++ ++ // __lasx_xvbitseti_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h( ++ ++ // __lasx_xvbitseti_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w( ++ ++ // __lasx_xvbitseti_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d( ++ ++ // __lasx_xvbitrev_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b( ++ ++ // __lasx_xvbitrev_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h( ++ ++ // __lasx_xvbitrev_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w( ++ ++ // __lasx_xvbitrev_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d( ++ ++ // __lasx_xvbitrevi_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b( ++ ++ // __lasx_xvbitrevi_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h( ++ ++ // __lasx_xvbitrevi_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w( ++ ++ // __lasx_xvbitrevi_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d( ++ ++ // __lasx_xvadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b( ++ ++ // __lasx_xvadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h( ++ ++ // __lasx_xvadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w( ++ ++ // __lasx_xvadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d( ++ ++ // __lasx_xvaddi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu( ++ ++ // __lasx_xvaddi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu( ++ ++ // __lasx_xvaddi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu( ++ ++ // __lasx_xvaddi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du( ++ ++ // __lasx_xvsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b( ++ ++ // __lasx_xvsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h( ++ ++ // __lasx_xvsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w( ++ ++ // __lasx_xvsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d( ++ ++ // __lasx_xvsubi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu( ++ ++ // __lasx_xvsubi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu( ++ ++ // __lasx_xvsubi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu( ++ ++ // __lasx_xvsubi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du( ++ ++ // __lasx_xvmax_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b( ++ ++ // __lasx_xvmax_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h( ++ ++ // __lasx_xvmax_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w( ++ ++ // __lasx_xvmax_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d( ++ ++ // __lasx_xvmaxi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b( ++ ++ // __lasx_xvmaxi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h( ++ ++ // __lasx_xvmaxi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w( ++ ++ // __lasx_xvmaxi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d( ++ ++ // __lasx_xvmax_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu( ++ ++ // __lasx_xvmax_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu( ++ ++ // __lasx_xvmax_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu( ++ ++ // __lasx_xvmax_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du( ++ ++ // __lasx_xvmaxi_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu( ++ ++ // __lasx_xvmaxi_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu( ++ ++ // __lasx_xvmaxi_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu( ++ ++ // __lasx_xvmaxi_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du( ++ ++ // __lasx_xvmin_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b( ++ ++ // __lasx_xvmin_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h( ++ ++ // __lasx_xvmin_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w( ++ ++ // __lasx_xvmin_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d( ++ ++ // __lasx_xvmini_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b( ++ ++ // __lasx_xvmini_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h( ++ ++ // __lasx_xvmini_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w( ++ ++ // __lasx_xvmini_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d( ++ ++ // __lasx_xvmin_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu( ++ ++ // __lasx_xvmin_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu( ++ ++ // __lasx_xvmin_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu( ++ ++ // __lasx_xvmin_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du( ++ ++ // __lasx_xvmini_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu( ++ ++ // __lasx_xvmini_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu( ++ ++ // __lasx_xvmini_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu( ++ ++ // __lasx_xvmini_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du( ++ ++ // __lasx_xvseq_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b( ++ ++ // __lasx_xvseq_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h( ++ ++ // __lasx_xvseq_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w( ++ ++ // __lasx_xvseq_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d( ++ ++ // __lasx_xvseqi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b( ++ ++ // __lasx_xvseqi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h( ++ ++ // __lasx_xvseqi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w( ++ ++ // __lasx_xvseqi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d( ++ ++ // __lasx_xvslt_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b( ++ ++ // __lasx_xvslt_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h( ++ ++ // __lasx_xvslt_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w( ++ ++ // __lasx_xvslt_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d( ++ ++ // __lasx_xvslti_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b( ++ ++ // __lasx_xvslti_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h( ++ ++ // __lasx_xvslti_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w( ++ ++ // __lasx_xvslti_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d( ++ ++ // __lasx_xvslt_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu( ++ ++ // __lasx_xvslt_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu( ++ ++ // __lasx_xvslt_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu( ++ ++ // __lasx_xvslt_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du( ++ ++ // __lasx_xvslti_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu( ++ ++ // __lasx_xvslti_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu( ++ ++ // __lasx_xvslti_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu( ++ ++ // __lasx_xvslti_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du( ++ ++ // __lasx_xvsle_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b( ++ ++ // __lasx_xvsle_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h( ++ ++ // __lasx_xvsle_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w( ++ ++ // __lasx_xvsle_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d( ++ ++ // __lasx_xvslei_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b( ++ ++ // __lasx_xvslei_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h( ++ ++ // __lasx_xvslei_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w( ++ ++ // __lasx_xvslei_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d( ++ ++ // __lasx_xvsle_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu( ++ ++ // __lasx_xvsle_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu( ++ ++ // __lasx_xvsle_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu( ++ ++ // __lasx_xvsle_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du( ++ ++ // __lasx_xvslei_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu( ++ ++ // __lasx_xvslei_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu( ++ ++ // __lasx_xvslei_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu( ++ ++ // __lasx_xvslei_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du( ++ ++ // __lasx_xvsat_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b( ++ ++ // __lasx_xvsat_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h( ++ ++ // __lasx_xvsat_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w( ++ ++ // __lasx_xvsat_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d( ++ ++ // __lasx_xvsat_bu ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu( ++ ++ // __lasx_xvsat_hu ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu( ++ ++ // __lasx_xvsat_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu( ++ ++ // __lasx_xvsat_du ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du( ++ ++ // __lasx_xvadda_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b( ++ ++ // __lasx_xvadda_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h( ++ ++ // __lasx_xvadda_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w( ++ ++ // __lasx_xvadda_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d( ++ ++ // __lasx_xvsadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b( ++ ++ // __lasx_xvsadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h( ++ ++ // __lasx_xvsadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w( ++ ++ // __lasx_xvsadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d( ++ ++ // __lasx_xvsadd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu( ++ ++ // __lasx_xvsadd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu( ++ ++ // __lasx_xvsadd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu( ++ ++ // __lasx_xvsadd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du( ++ ++ // __lasx_xvavg_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b( ++ ++ // __lasx_xvavg_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h( ++ ++ // __lasx_xvavg_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w( ++ ++ // __lasx_xvavg_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d( ++ ++ // __lasx_xvavg_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu( ++ ++ // __lasx_xvavg_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu( ++ ++ // __lasx_xvavg_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu( ++ ++ // __lasx_xvavg_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du( ++ ++ // __lasx_xvavgr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b( ++ ++ // __lasx_xvavgr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h( ++ ++ // __lasx_xvavgr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w( ++ ++ // __lasx_xvavgr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d( ++ ++ // __lasx_xvavgr_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu( ++ ++ // __lasx_xvavgr_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu( ++ ++ // __lasx_xvavgr_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu( ++ ++ // __lasx_xvavgr_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du( ++ ++ // __lasx_xvssub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b( ++ ++ // __lasx_xvssub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h( ++ ++ // __lasx_xvssub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w( ++ ++ // __lasx_xvssub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d( ++ ++ // __lasx_xvssub_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu( ++ ++ // __lasx_xvssub_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu( ++ ++ // __lasx_xvssub_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu( ++ ++ // __lasx_xvssub_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du( ++ ++ // __lasx_xvabsd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b( ++ ++ // __lasx_xvabsd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h( ++ ++ // __lasx_xvabsd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w( ++ ++ // __lasx_xvabsd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d( ++ ++ // __lasx_xvabsd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu( ++ ++ // __lasx_xvabsd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu( ++ ++ // __lasx_xvabsd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu( ++ ++ // __lasx_xvabsd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du( ++ ++ // __lasx_xvmul_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b( ++ ++ // __lasx_xvmul_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h( ++ ++ // __lasx_xvmul_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w( ++ ++ // __lasx_xvmul_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d( ++ ++ // __lasx_xvmadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b( ++ ++ // __lasx_xvmadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h( ++ ++ // __lasx_xvmadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w( ++ ++ // __lasx_xvmadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d( ++ ++ // __lasx_xvmsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b( ++ ++ // __lasx_xvmsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h( ++ ++ // __lasx_xvmsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w( ++ ++ // __lasx_xvmsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d( ++ ++ // __lasx_xvdiv_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b( ++ ++ // __lasx_xvdiv_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h( ++ ++ // __lasx_xvdiv_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w( ++ ++ // __lasx_xvdiv_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d( ++ ++ // __lasx_xvdiv_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu( ++ ++ // __lasx_xvdiv_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu( ++ ++ // __lasx_xvdiv_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu( ++ ++ // __lasx_xvdiv_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du( ++ ++ // __lasx_xvhaddw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b( ++ ++ // __lasx_xvhaddw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h( ++ ++ // __lasx_xvhaddw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w( ++ ++ // __lasx_xvhaddw_hu_bu ++ // xd, xj, xk ++ // UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu( ++ ++ // __lasx_xvhaddw_wu_hu ++ // xd, xj, xk ++ // UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu( ++ ++ // __lasx_xvhaddw_du_wu ++ // xd, xj, xk ++ // UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu( ++ ++ // __lasx_xvhsubw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b( ++ ++ // __lasx_xvhsubw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h( ++ ++ // __lasx_xvhsubw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w( ++ ++ // __lasx_xvhsubw_hu_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu( ++ ++ // __lasx_xvhsubw_wu_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu( ++ ++ // __lasx_xvhsubw_du_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu( ++ ++ // __lasx_xvmod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b( ++ ++ // __lasx_xvmod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h( ++ ++ // __lasx_xvmod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w( ++ ++ // __lasx_xvmod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d( ++ ++ // __lasx_xvmod_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu( ++ ++ // __lasx_xvmod_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu( ++ ++ // __lasx_xvmod_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu( ++ ++ // __lasx_xvmod_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du( ++ ++ // __lasx_xvrepl128vei_b ++ // xd, xj, ui4 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b( ++ ++ // __lasx_xvrepl128vei_h ++ // xd, xj, ui3 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h( ++ ++ // __lasx_xvrepl128vei_w ++ // xd, xj, ui2 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w( ++ ++ // __lasx_xvrepl128vei_d ++ // xd, xj, ui1 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d( ++ ++ // __lasx_xvpickev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b( ++ ++ // __lasx_xvpickev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h( ++ ++ // __lasx_xvpickev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w( ++ ++ // __lasx_xvpickev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d( ++ ++ // __lasx_xvpickod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b( ++ ++ // __lasx_xvpickod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h( ++ ++ // __lasx_xvpickod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w( ++ ++ // __lasx_xvpickod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d( ++ ++ // __lasx_xvilvh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b( ++ ++ // __lasx_xvilvh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h( ++ ++ // __lasx_xvilvh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w( ++ ++ // __lasx_xvilvh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d( ++ ++ // __lasx_xvilvl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b( ++ ++ // __lasx_xvilvl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h( ++ ++ // __lasx_xvilvl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w( ++ ++ // __lasx_xvilvl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d( ++ ++ // __lasx_xvpackev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b( ++ ++ // __lasx_xvpackev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h( ++ ++ // __lasx_xvpackev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w( ++ ++ // __lasx_xvpackev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d( ++ ++ // __lasx_xvpackod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b( ++ ++ // __lasx_xvpackod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h( ++ ++ // __lasx_xvpackod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w( ++ ++ // __lasx_xvpackod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d( ++ ++ // __lasx_xvshuf_b ++ // xd, xj, xk, xa ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b( ++ ++ // __lasx_xvshuf_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h( ++ ++ // __lasx_xvshuf_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w( ++ ++ // __lasx_xvshuf_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d( ++ ++ // __lasx_xvand_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v( ++ ++ // __lasx_xvandi_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b( ++ ++ // __lasx_xvor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v( ++ ++ // __lasx_xvori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b( ++ ++ // __lasx_xvnor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v( ++ ++ // __lasx_xvnori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b( ++ ++ // __lasx_xvxor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v( ++ ++ // __lasx_xvxori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b( ++ ++ // __lasx_xvbitsel_v ++ // xd, xj, xk, xa ++ // UV32QI, UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v( ++ ++ // __lasx_xvbitseli_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b( ++ ++ // __lasx_xvshuf4i_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, USI ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b( ++ ++ // __lasx_xvshuf4i_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, USI ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h( ++ ++ // __lasx_xvshuf4i_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, USI ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w( ++ ++ // __lasx_xvreplgr2vr_b ++ // xd, rj ++ // V32QI, SI ++ v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b( ++ ++ // __lasx_xvreplgr2vr_h ++ // xd, rj ++ // V16HI, SI ++ v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h( ++ ++ // __lasx_xvreplgr2vr_w ++ // xd, rj ++ // V8SI, SI ++ v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w( ++ ++ // __lasx_xvreplgr2vr_d ++ // xd, rj ++ // V4DI, DI ++ v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d( ++ ++ // __lasx_xvpcnt_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b( ++ ++ // __lasx_xvpcnt_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h( ++ ++ // __lasx_xvpcnt_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w( ++ ++ // __lasx_xvpcnt_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d( ++ ++ // __lasx_xvclo_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b( ++ ++ // __lasx_xvclo_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h( ++ ++ // __lasx_xvclo_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w( ++ ++ // __lasx_xvclo_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d( ++ ++ // __lasx_xvclz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b( ++ ++ // __lasx_xvclz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h( ++ ++ // __lasx_xvclz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w( ++ ++ // __lasx_xvclz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d( ++ ++ // __lasx_xvfcmp_caf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s( ++ ++ // __lasx_xvfcmp_caf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d( ++ ++ // __lasx_xvfcmp_cor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s( ++ ++ // __lasx_xvfcmp_cor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d( ++ ++ // __lasx_xvfcmp_cun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s( ++ ++ // __lasx_xvfcmp_cun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d( ++ ++ // __lasx_xvfcmp_cune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s( ++ ++ // __lasx_xvfcmp_cune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d( ++ ++ // __lasx_xvfcmp_cueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s( ++ ++ // __lasx_xvfcmp_cueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d( ++ ++ // __lasx_xvfcmp_ceq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s( ++ ++ // __lasx_xvfcmp_ceq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d( ++ ++ // __lasx_xvfcmp_cne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s( ++ ++ // __lasx_xvfcmp_cne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d( ++ ++ // __lasx_xvfcmp_clt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s( ++ ++ // __lasx_xvfcmp_clt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d( ++ ++ // __lasx_xvfcmp_cult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s( ++ ++ // __lasx_xvfcmp_cult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d( ++ ++ // __lasx_xvfcmp_cle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s( ++ ++ // __lasx_xvfcmp_cle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d( ++ ++ // __lasx_xvfcmp_cule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s( ++ ++ // __lasx_xvfcmp_cule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d( ++ ++ // __lasx_xvfcmp_saf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s( ++ ++ // __lasx_xvfcmp_saf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d( ++ ++ // __lasx_xvfcmp_sor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s( ++ ++ // __lasx_xvfcmp_sor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d( ++ ++ // __lasx_xvfcmp_sun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s( ++ ++ // __lasx_xvfcmp_sun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d( ++ ++ // __lasx_xvfcmp_sune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s( ++ ++ // __lasx_xvfcmp_sune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d( ++ ++ // __lasx_xvfcmp_sueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s( ++ ++ // __lasx_xvfcmp_sueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d( ++ ++ // __lasx_xvfcmp_seq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s( ++ ++ // __lasx_xvfcmp_seq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d( ++ ++ // __lasx_xvfcmp_sne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s( ++ ++ // __lasx_xvfcmp_sne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d( ++ ++ // __lasx_xvfcmp_slt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s( ++ ++ // __lasx_xvfcmp_slt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d( ++ ++ // __lasx_xvfcmp_sult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s( ++ ++ // __lasx_xvfcmp_sult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d( ++ ++ // __lasx_xvfcmp_sle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s( ++ ++ // __lasx_xvfcmp_sle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d( ++ ++ // __lasx_xvfcmp_sule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s( ++ ++ // __lasx_xvfcmp_sule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d( ++ ++ // __lasx_xvfadd_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s( ++ ++ // __lasx_xvfadd_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d( ++ ++ // __lasx_xvfsub_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s( ++ ++ // __lasx_xvfsub_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d( ++ ++ // __lasx_xvfmul_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s( ++ ++ // __lasx_xvfmul_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d( ++ ++ // __lasx_xvfdiv_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s( ++ ++ // __lasx_xvfdiv_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d( ++ ++ // __lasx_xvfcvt_h_s ++ // xd, xj, xk ++ // V16HI, V8SF, V8SF ++ v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s( ++ ++ // __lasx_xvfcvt_s_d ++ // xd, xj, xk ++ // V8SF, V4DF, V4DF ++ v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d( ++ ++ // __lasx_xvfmin_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s( ++ ++ // __lasx_xvfmin_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d( ++ ++ // __lasx_xvfmina_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s( ++ ++ // __lasx_xvfmina_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d( ++ ++ // __lasx_xvfmax_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s( ++ ++ // __lasx_xvfmax_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d( ++ ++ // __lasx_xvfmaxa_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s( ++ ++ // __lasx_xvfmaxa_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d( ++ ++ // __lasx_xvfclass_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s( ++ ++ // __lasx_xvfclass_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d( ++ ++ // __lasx_xvfsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s( ++ ++ // __lasx_xvfsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d( ++ ++ // __lasx_xvfrecip_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s( ++ ++ // __lasx_xvfrecip_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d( ++ ++ // __lasx_xvfrint_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s( ++ ++ // __lasx_xvfrint_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d( ++ ++ // __lasx_xvfrsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s( ++ ++ // __lasx_xvfrsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d( ++ ++ // __lasx_xvflogb_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s( ++ ++ // __lasx_xvflogb_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d( ++ ++ // __lasx_xvfcvth_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h( ++ ++ // __lasx_xvfcvth_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s( ++ ++ // __lasx_xvfcvtl_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h( ++ ++ // __lasx_xvfcvtl_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s( ++ ++ // __lasx_xvftint_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s( ++ ++ // __lasx_xvftint_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d( ++ ++ // __lasx_xvftint_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s( ++ ++ // __lasx_xvftint_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d( ++ ++ // __lasx_xvftintrz_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s( ++ ++ // __lasx_xvftintrz_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d( ++ ++ // __lasx_xvftintrz_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s( ++ ++ // __lasx_xvftintrz_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d( ++ ++ // __lasx_xvffint_s_w ++ // xd, xj ++ // V8SF, V8SI ++ v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w( ++ ++ // __lasx_xvffint_d_l ++ // xd, xj ++ // V4DF, V4DI ++ v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l( ++ ++ // __lasx_xvffint_s_wu ++ // xd, xj ++ // V8SF, UV8SI ++ v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu( ++ ++ // __lasx_xvffint_d_lu ++ // xd, xj ++ // V4DF, UV4DI ++ v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu( ++ ++ // __lasx_xvreplve_b ++ // xd, xj, rk ++ // V32QI, V32QI, SI ++ v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b( ++ ++ // __lasx_xvreplve_h ++ // xd, xj, rk ++ // V16HI, V16HI, SI ++ v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h( ++ ++ // __lasx_xvreplve_w ++ // xd, xj, rk ++ // V8SI, V8SI, SI ++ v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w( ++ ++ // __lasx_xvreplve_d ++ // xd, xj, rk ++ // V4DI, V4DI, SI ++ v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d( ++ ++ // __lasx_xvpermi_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w( ++ ++ // __lasx_xvandn_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v( ++ ++ // __lasx_xvneg_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b( ++ ++ // __lasx_xvneg_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h( ++ ++ // __lasx_xvneg_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w( ++ ++ // __lasx_xvneg_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d( ++ ++ // __lasx_xvmuh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b( ++ ++ // __lasx_xvmuh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h( ++ ++ // __lasx_xvmuh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w( ++ ++ // __lasx_xvmuh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d( ++ ++ // __lasx_xvmuh_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu( ++ ++ // __lasx_xvmuh_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu( ++ ++ // __lasx_xvmuh_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu( ++ ++ // __lasx_xvmuh_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du( ++ ++ // __lasx_xvsllwil_h_b ++ // xd, xj, ui3 ++ // V16HI, V32QI, UQI ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b( ++ ++ // __lasx_xvsllwil_w_h ++ // xd, xj, ui4 ++ // V8SI, V16HI, UQI ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h( ++ ++ // __lasx_xvsllwil_d_w ++ // xd, xj, ui5 ++ // V4DI, V8SI, UQI ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w( ++ ++ // __lasx_xvsllwil_hu_bu ++ // xd, xj, ui3 ++ // UV16HI, UV32QI, UQI ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu( ++ ++ // __lasx_xvsllwil_wu_hu ++ // xd, xj, ui4 ++ // UV8SI, UV16HI, UQI ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu( ++ ++ // __lasx_xvsllwil_du_wu ++ // xd, xj, ui5 ++ // UV4DI, UV8SI, UQI ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu( ++ ++ // __lasx_xvsran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h( ++ ++ // __lasx_xvsran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w( ++ ++ // __lasx_xvsran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d( ++ ++ // __lasx_xvssran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h( ++ ++ // __lasx_xvssran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w( ++ ++ // __lasx_xvssran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d( ++ ++ // __lasx_xvssran_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h( ++ ++ // __lasx_xvssran_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w( ++ ++ // __lasx_xvssran_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d( ++ ++ // __lasx_xvsrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h( ++ ++ // __lasx_xvsrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w( ++ ++ // __lasx_xvsrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d( ++ ++ // __lasx_xvssrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h( ++ ++ // __lasx_xvssrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w( ++ ++ // __lasx_xvssrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d( ++ ++ // __lasx_xvssrarn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h( ++ ++ // __lasx_xvssrarn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w( ++ ++ // __lasx_xvssrarn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d( ++ ++ // __lasx_xvsrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h( ++ ++ // __lasx_xvsrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w( ++ ++ // __lasx_xvsrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d( ++ ++ // __lasx_xvssrln_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h( ++ ++ // __lasx_xvssrln_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w( ++ ++ // __lasx_xvssrln_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d( ++ ++ // __lasx_xvsrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h( ++ ++ // __lasx_xvsrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w( ++ ++ // __lasx_xvsrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d( ++ ++ // __lasx_xvssrlrn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h( ++ ++ // __lasx_xvssrlrn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w( ++ ++ // __lasx_xvssrlrn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d( ++ ++ // __lasx_xvfrstpi_b ++ // xd, xj, ui5 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b( ++ ++ // __lasx_xvfrstpi_h ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h( ++ ++ // __lasx_xvfrstp_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b( ++ ++ // __lasx_xvfrstp_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h( ++ ++ // __lasx_xvshuf4i_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d( ++ ++ // __lasx_xvbsrl_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v( ++ ++ // __lasx_xvbsll_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v( ++ ++ // __lasx_xvextrins_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b( ++ ++ // __lasx_xvextrins_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h( ++ ++ // __lasx_xvextrins_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w( ++ ++ // __lasx_xvextrins_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d( ++ ++ // __lasx_xvmskltz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b( ++ ++ // __lasx_xvmskltz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h( ++ ++ // __lasx_xvmskltz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w( ++ ++ // __lasx_xvmskltz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d( ++ ++ // __lasx_xvsigncov_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b( ++ ++ // __lasx_xvsigncov_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h( ++ ++ // __lasx_xvsigncov_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w( ++ ++ // __lasx_xvsigncov_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d( ++ ++ // __lasx_xvfmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s( ++ ++ // __lasx_xvfmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d( ++ ++ // __lasx_xvfmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s( ++ ++ // __lasx_xvfmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d( ++ ++ // __lasx_xvfnmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s( ++ ++ // __lasx_xvfnmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d( ++ ++ // __lasx_xvfnmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s( ++ ++ // __lasx_xvfnmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d( ++ ++ // __lasx_xvftintrne_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s( ++ ++ // __lasx_xvftintrne_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d( ++ ++ // __lasx_xvftintrp_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s( ++ ++ // __lasx_xvftintrp_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d( ++ ++ // __lasx_xvftintrm_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s( ++ ++ // __lasx_xvftintrm_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d( ++ ++ // __lasx_xvftint_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d( ++ ++ // __lasx_xvffint_s_l ++ // xd, xj, xk ++ // V8SF, V4DI, V4DI ++ v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l( ++ ++ // __lasx_xvftintrz_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d( ++ ++ // __lasx_xvftintrp_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d( ++ ++ // __lasx_xvftintrm_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d( ++ ++ // __lasx_xvftintrne_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d( ++ ++ // __lasx_xvftinth_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s( ++ ++ // __lasx_xvftintl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s( ++ ++ // __lasx_xvffinth_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w( ++ ++ // __lasx_xvffintl_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w( ++ ++ // __lasx_xvftintrzh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s( ++ ++ // __lasx_xvftintrzl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s( ++ ++ // __lasx_xvftintrph_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s( ++ ++ // __lasx_xvftintrpl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s( ++ ++ // __lasx_xvftintrmh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s( ++ ++ // __lasx_xvftintrml_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s( ++ ++ // __lasx_xvftintrneh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s( ++ ++ // __lasx_xvftintrnel_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s( ++ ++ // __lasx_xvfrintrne_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s( ++ ++ // __lasx_xvfrintrne_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d( ++ ++ // __lasx_xvfrintrz_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s( ++ ++ // __lasx_xvfrintrz_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d( ++ ++ // __lasx_xvfrintrp_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s( ++ ++ // __lasx_xvfrintrp_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d( ++ ++ // __lasx_xvfrintrm_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s( ++ ++ // __lasx_xvfrintrm_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d( ++ ++ // __lasx_xvld ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld( ++ ++ // __lasx_xvst ++ // xd, rj, si12 ++ // VOID, V32QI, CVPOINTER, SI ++ __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst( ++ ++ // __lasx_xvstelm_b ++ // xd, rj, si8, idx ++ // VOID, V32QI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b( ++ ++ // __lasx_xvstelm_h ++ // xd, rj, si8, idx ++ // VOID, V16HI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h( ++ ++ // __lasx_xvstelm_w ++ // xd, rj, si8, idx ++ // VOID, V8SI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w( ++ ++ // __lasx_xvstelm_d ++ // xd, rj, si8, idx ++ // VOID, V4DI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d( ++ ++ // __lasx_xvinsve0_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w( ++ ++ // __lasx_xvinsve0_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d( ++ ++ // __lasx_xvpickve_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w( ++ ++ // __lasx_xvpickve_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d( ++ ++ // __lasx_xvssrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h( ++ ++ // __lasx_xvssrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w( ++ ++ // __lasx_xvssrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d( ++ ++ // __lasx_xvssrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h( ++ ++ // __lasx_xvssrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w( ++ ++ // __lasx_xvssrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d( ++ ++ // __lasx_xvorn_v ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v( ++ ++ // __lasx_xvldi ++ // xd, i13 ++ // V4DI, HI ++ v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi( ++ ++ // __lasx_xvldx ++ // xd, rj, rk ++ // V32QI, CVPOINTER, DI ++ v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx( ++ ++ // __lasx_xvstx ++ // xd, rj, rk ++ // VOID, V32QI, CVPOINTER, DI ++ __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx( ++ ++ // __lasx_xvinsgr2vr_w ++ // xd, rj, ui3 ++ // V8SI, V8SI, SI, UQI ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w( ++ ++ // __lasx_xvinsgr2vr_d ++ // xd, rj, ui2 ++ // V4DI, V4DI, DI, UQI ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d( ++ ++ // __lasx_xvreplve0_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b( ++ ++ // __lasx_xvreplve0_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h( ++ ++ // __lasx_xvreplve0_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w( ++ ++ // __lasx_xvreplve0_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d( ++ ++ // __lasx_xvreplve0_q ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q( ++ ++ // __lasx_vext2xv_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b( ++ ++ // __lasx_vext2xv_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h( ++ ++ // __lasx_vext2xv_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w( ++ ++ // __lasx_vext2xv_w_b ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b( ++ ++ //gcc build fail ++ // __lasx_vext2xv_d_h ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h( ++ ++ // __lasx_vext2xv_d_b ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b( ++ ++ // __lasx_vext2xv_hu_bu ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu( ++ ++ // __lasx_vext2xv_wu_hu ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu( ++ ++ // __lasx_vext2xv_du_wu ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu( ++ ++ // __lasx_vext2xv_wu_bu ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu( ++ ++ //gcc build fail ++ // __lasx_vext2xv_du_hu ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu( ++ ++ // __lasx_vext2xv_du_bu ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu( ++ ++ // __lasx_xvpermi_q ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q( ++ ++ // __lasx_xvpermi_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, USI ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d( ++ ++ // __lasx_xvperm_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w( ++ ++ // __lasx_xvldrepl_b ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b( ++ ++ // __lasx_xvldrepl_h ++ // xd, rj, si11 ++ // V16HI, CVPOINTER, SI ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h( ++ ++ // __lasx_xvldrepl_w ++ // xd, rj, si10 ++ // V8SI, CVPOINTER, SI ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w( ++ ++ // __lasx_xvldrepl_d ++ // xd, rj, si9 ++ // V4DI, CVPOINTER, SI ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d( ++ ++ // __lasx_xvpickve2gr_w ++ // rd, xj, ui3 ++ // SI, V8SI, UQI ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w( ++ ++ // __lasx_xvpickve2gr_wu ++ // rd, xj, ui3 ++ // USI, V8SI, UQI ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu( ++ ++ // __lasx_xvpickve2gr_d ++ // rd, xj, ui2 ++ // DI, V4DI, UQI ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d( ++ ++ // __lasx_xvpickve2gr_du ++ // rd, xj, ui2 ++ // UDI, V4DI, UQI ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du( ++ ++ // __lasx_xvaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d( ++ ++ // __lasx_xvaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w( ++ ++ // __lasx_xvaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h( ++ ++ // __lasx_xvaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b( ++ ++ // __lasx_xvaddwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du( ++ ++ // __lasx_xvaddwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu( ++ ++ // __lasx_xvaddwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu( ++ ++ // __lasx_xvaddwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu( ++ ++ // __lasx_xvsubwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d( ++ ++ // __lasx_xvsubwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w( ++ ++ // __lasx_xvsubwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h( ++ ++ // __lasx_xvsubwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b( ++ ++ // __lasx_xvsubwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du( ++ ++ // __lasx_xvsubwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu( ++ ++ // __lasx_xvsubwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu( ++ ++ // __lasx_xvsubwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu( ++ ++ // __lasx_xvmulwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d( ++ ++ // __lasx_xvmulwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w( ++ ++ // __lasx_xvmulwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h( ++ ++ // __lasx_xvmulwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b( ++ ++ // __lasx_xvmulwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du( ++ ++ // __lasx_xvmulwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu( ++ ++ // __lasx_xvmulwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu( ++ ++ // __lasx_xvmulwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu( ++ ++ // __lasx_xvaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d( ++ ++ // __lasx_xvaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w( ++ ++ // __lasx_xvaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h( ++ ++ // __lasx_xvaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b( ++ ++ // __lasx_xvaddwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du( ++ ++ // __lasx_xvaddwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu( ++ ++ // __lasx_xvaddwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu( ++ ++ // __lasx_xvaddwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu( ++ ++ // __lasx_xvsubwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d( ++ ++ // __lasx_xvsubwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w( ++ ++ // __lasx_xvsubwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h( ++ ++ // __lasx_xvsubwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b( ++ ++ // __lasx_xvsubwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du( ++ ++ // __lasx_xvsubwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu( ++ ++ // __lasx_xvsubwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu( ++ ++ // __lasx_xvsubwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu( ++ ++ // __lasx_xvmulwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d( ++ ++ // __lasx_xvmulwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w( ++ ++ // __lasx_xvmulwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h( ++ ++ // __lasx_xvmulwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b( ++ ++ // __lasx_xvmulwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du( ++ ++ // __lasx_xvmulwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu( ++ ++ // __lasx_xvmulwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu( ++ ++ // __lasx_xvmulwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu( ++ ++ // __lasx_xvaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w( ++ ++ // __lasx_xvaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h( ++ ++ // __lasx_xvaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b( ++ ++ // __lasx_xvmulwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w( ++ ++ // __lasx_xvmulwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h( ++ ++ // __lasx_xvmulwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b( ++ ++ // __lasx_xvaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w( ++ ++ // __lasx_xvaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h( ++ ++ // __lasx_xvaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b( ++ ++ // __lasx_xvmulwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w( ++ ++ // __lasx_xvmulwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h( ++ ++ // __lasx_xvmulwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b( ++ ++ // __lasx_xvhaddw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d( ++ ++ // __lasx_xvhaddw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du( ++ ++ // __lasx_xvhsubw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d( ++ ++ // __lasx_xvhsubw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du( ++ ++ // __lasx_xvmaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d( ++ ++ // __lasx_xvmaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w( ++ ++ // __lasx_xvmaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h( ++ ++ // __lasx_xvmaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b( ++ ++ // __lasx_xvmaddwev_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du( ++ ++ // __lasx_xvmaddwev_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu( ++ ++ // __lasx_xvmaddwev_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu( ++ ++ // __lasx_xvmaddwev_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu( ++ ++ // __lasx_xvmaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d( ++ ++ // __lasx_xvmaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w( ++ ++ // __lasx_xvmaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h( ++ ++ // __lasx_xvmaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b( ++ ++ // __lasx_xvmaddwod_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du( ++ ++ // __lasx_xvmaddwod_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu( ++ ++ // __lasx_xvmaddwod_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu( ++ ++ // __lasx_xvmaddwod_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu( ++ ++ // __lasx_xvmaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d( ++ ++ // __lasx_xvmaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w( ++ ++ // __lasx_xvmaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h( ++ ++ // __lasx_xvmaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b( ++ ++ // __lasx_xvmaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d( ++ ++ // __lasx_xvmaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w( ++ ++ // __lasx_xvmaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h( ++ ++ // __lasx_xvmaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b( ++ ++ // __lasx_xvrotr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b( ++ ++ // __lasx_xvrotr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h( ++ ++ // __lasx_xvrotr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w( ++ ++ // __lasx_xvrotr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d( ++ ++ // __lasx_xvadd_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q( ++ ++ // __lasx_xvsub_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q( ++ ++ // __lasx_xvaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d( ++ ++ // __lasx_xvaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d( ++ ++ // __lasx_xvmulwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d( ++ ++ // __lasx_xvmulwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d( ++ ++ // __lasx_xvmskgez_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b( ++ ++ // __lasx_xvmsknz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b( ++ ++ // __lasx_xvexth_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b( ++ ++ // __lasx_xvexth_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h( ++ ++ // __lasx_xvexth_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w( ++ ++ // __lasx_xvexth_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d( ++ ++ // __lasx_xvexth_hu_bu ++ // xd, xj ++ // UV16HI, UV32QI ++ v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu( ++ ++ // __lasx_xvexth_wu_hu ++ // xd, xj ++ // UV8SI, UV16HI ++ v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu( ++ ++ // __lasx_xvexth_du_wu ++ // xd, xj ++ // UV4DI, UV8SI ++ v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu( ++ ++ // __lasx_xvexth_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du( ++ ++ // __lasx_xvextl_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d( ++ ++ // __lasx_xvextl_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du( ++ ++ // __lasx_xvrotri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b( ++ ++ // __lasx_xvrotri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h( ++ ++ // __lasx_xvrotri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w( ++ ++ // __lasx_xvrotri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d( ++ ++ // __lasx_xvsrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h( ++ ++ // __lasx_xvsrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w( ++ ++ // __lasx_xvsrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d( ++ ++ // __lasx_xvsrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q( ++ ++ // __lasx_xvsrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h( ++ ++ // __lasx_xvsrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w( ++ ++ // __lasx_xvsrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d( ++ ++ // __lasx_xvsrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q( ++ ++ // __lasx_xvssrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h( ++ ++ // __lasx_xvssrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w( ++ ++ // __lasx_xvssrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d( ++ ++ // __lasx_xvssrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q( ++ ++ // __lasx_xvssrlni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h( ++ ++ // __lasx_xvssrlni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w( ++ ++ // __lasx_xvssrlni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d( ++ ++ // __lasx_xvssrlni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q( ++ ++ // __lasx_xvssrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h( ++ ++ // __lasx_xvssrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w( ++ ++ // __lasx_xvssrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d( ++ ++ // __lasx_xvssrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q( ++ ++ // __lasx_xvssrlrni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h( ++ ++ // __lasx_xvssrlrni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w( ++ ++ // __lasx_xvssrlrni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d( ++ ++ // __lasx_xvssrlrni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q( ++ ++ // __lasx_xvsrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h( ++ ++ // __lasx_xvsrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w( ++ ++ // __lasx_xvsrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d( ++ ++ // __lasx_xvsrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q( ++ ++ // __lasx_xvsrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h( ++ ++ // __lasx_xvsrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w( ++ ++ // __lasx_xvsrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d( ++ ++ // __lasx_xvsrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q( ++ ++ // __lasx_xvssrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h( ++ ++ // __lasx_xvssrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w( ++ ++ // __lasx_xvssrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d( ++ ++ // __lasx_xvssrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q( ++ ++ // __lasx_xvssrani_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h( ++ ++ // __lasx_xvssrani_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w( ++ ++ // __lasx_xvssrani_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d( ++ ++ // __lasx_xvssrani_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q( ++ ++ // __lasx_xvssrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h( ++ ++ // __lasx_xvssrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w( ++ ++ // __lasx_xvssrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d( ++ ++ // __lasx_xvssrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q( ++ ++ // __lasx_xvssrarni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h( ++ ++ // __lasx_xvssrarni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w( ++ ++ // __lasx_xvssrarni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d( ++ ++ // __lasx_xvssrarni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q( ++ ++ // __lasx_xbnz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v( ++ ++ // __lasx_xbz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v( ++ ++ // __lasx_xbnz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b( ++ ++ // __lasx_xbnz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h( ++ ++ // __lasx_xbnz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w( ++ ++ // __lasx_xbnz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d( ++ ++ // __lasx_xbz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b( ++ ++ // __lasx_xbz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h( ++ ++ // __lasx_xbz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w( ++ ++ // __lasx_xbz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d( ++ ++ v32i8_r = __lasx_xvrepli_b(2); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepli.b( ++ ++ v16i16_r = __lasx_xvrepli_h(2); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepli.h( ++ ++ v8i32_r = __lasx_xvrepli_w(2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepli.w( ++ ++ v4i64_r = __lasx_xvrepli_d(2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepli.d( ++ ++ v4f64_r = __lasx_xvpickve_d_f(v4f64_a, 2); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f( ++ ++ v8f32_r = __lasx_xvpickve_w_f(v8f32_a, 2); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f( ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lsx-error.c b/clang/test/CodeGen/builtins-loongarch-lsx-error.c +new file mode 100644 +index 000000000..f566a7362 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lsx-error.c +@@ -0,0 +1,250 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v16i8_r = __lsx_vslli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vslli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vslli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrai_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrai_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrai_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrai_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrari_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrari_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrari_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrari_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vaddi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsubi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmini_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmini_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmini_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmini_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmini_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmini_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmini_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmini_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vseqi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vseqi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vseqi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vseqi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslti_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslti_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslti_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslti_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslti_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslti_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vslei_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslei_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslei_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslei_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslei_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslei_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslei_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslei_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsat_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsat_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsat_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsat_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vsat_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vsat_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vsat_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vsat_du(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16u8_r = __lsx_vandi_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vnori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vxori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vbsll_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ v16i8_r = __lsx_vrotri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vrotri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vrotri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vrotri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vld(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(v16i8_a, &v16i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v2i64_r = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lsx.c b/clang/test/CodeGen/builtins-loongarch-lsx.c +new file mode 100644 +index 000000000..2b86c0b2e +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lsx.c +@@ -0,0 +1,3645 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1 0 ++#define ui2 1 ++#define ui3 4 ++#define ui4 7 ++#define ui5 25 ++#define ui6 44 ++#define ui7 100 ++#define ui8 127 //200 ++#define si5 -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 11 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lsx_vsll_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b( ++ ++ // __lsx_vsll_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h( ++ ++ // __lsx_vsll_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w( ++ ++ // __lsx_vsll_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d( ++ ++ // __lsx_vslli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b( ++ ++ // __lsx_vslli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h( ++ ++ // __lsx_vslli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w( ++ ++ // __lsx_vslli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d( ++ ++ // __lsx_vsra_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b( ++ ++ // __lsx_vsra_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h( ++ ++ // __lsx_vsra_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w( ++ ++ // __lsx_vsra_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d( ++ ++ // __lsx_vsrai_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b( ++ ++ // __lsx_vsrai_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h( ++ ++ // __lsx_vsrai_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w( ++ ++ // __lsx_vsrai_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d( ++ ++ // __lsx_vsrar_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b( ++ ++ // __lsx_vsrar_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h( ++ ++ // __lsx_vsrar_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w( ++ ++ // __lsx_vsrar_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d( ++ ++ // __lsx_vsrari_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b( ++ ++ // __lsx_vsrari_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h( ++ ++ // __lsx_vsrari_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w( ++ ++ // __lsx_vsrari_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d( ++ ++ // __lsx_vsrl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b( ++ ++ // __lsx_vsrl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h( ++ ++ // __lsx_vsrl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w( ++ ++ // __lsx_vsrl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d( ++ ++ // __lsx_vsrli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b( ++ ++ // __lsx_vsrli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h( ++ ++ // __lsx_vsrli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w( ++ ++ // __lsx_vsrli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d( ++ ++ // __lsx_vsrlr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b( ++ ++ // __lsx_vsrlr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h( ++ ++ // __lsx_vsrlr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w( ++ ++ // __lsx_vsrlr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d( ++ ++ // __lsx_vsrlri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b( ++ ++ // __lsx_vsrlri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h( ++ ++ // __lsx_vsrlri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w( ++ ++ // __lsx_vsrlri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d( ++ ++ // __lsx_vbitclr_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b( ++ ++ // __lsx_vbitclr_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h( ++ ++ // __lsx_vbitclr_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w( ++ ++ // __lsx_vbitclr_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d( ++ ++ // __lsx_vbitclri_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b( ++ ++ // __lsx_vbitclri_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h( ++ ++ // __lsx_vbitclri_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w( ++ ++ // __lsx_vbitclri_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d( ++ ++ // __lsx_vbitset_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b( ++ ++ // __lsx_vbitset_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h( ++ ++ // __lsx_vbitset_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w( ++ ++ // __lsx_vbitset_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d( ++ ++ // __lsx_vbitseti_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b( ++ ++ // __lsx_vbitseti_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h( ++ ++ // __lsx_vbitseti_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w( ++ ++ // __lsx_vbitseti_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d( ++ ++ // __lsx_vbitrev_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b( ++ ++ // __lsx_vbitrev_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h( ++ ++ // __lsx_vbitrev_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w( ++ ++ // __lsx_vbitrev_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d( ++ ++ // __lsx_vbitrevi_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b( ++ ++ // __lsx_vbitrevi_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h( ++ ++ // __lsx_vbitrevi_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w( ++ ++ // __lsx_vbitrevi_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d( ++ ++ // __lsx_vadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b( ++ ++ // __lsx_vadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h( ++ ++ // __lsx_vadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w( ++ ++ // __lsx_vadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d( ++ ++ // __lsx_vaddi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu( ++ ++ // __lsx_vaddi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu( ++ ++ // __lsx_vaddi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu( ++ ++ // __lsx_vaddi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du( ++ ++ // __lsx_vsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b( ++ ++ // __lsx_vsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h( ++ ++ // __lsx_vsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w( ++ ++ // __lsx_vsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d( ++ ++ // __lsx_vsubi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu( ++ ++ // __lsx_vsubi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu( ++ ++ // __lsx_vsubi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu( ++ ++ // __lsx_vsubi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du( ++ ++ // __lsx_vmax_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b( ++ ++ // __lsx_vmax_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h( ++ ++ // __lsx_vmax_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w( ++ ++ // __lsx_vmax_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d( ++ ++ // __lsx_vmaxi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b( ++ ++ // __lsx_vmaxi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h( ++ ++ // __lsx_vmaxi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w( ++ ++ // __lsx_vmaxi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d( ++ ++ // __lsx_vmax_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu( ++ ++ // __lsx_vmax_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu( ++ ++ // __lsx_vmax_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu( ++ ++ // __lsx_vmax_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du( ++ ++ // __lsx_vmaxi_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu( ++ ++ // __lsx_vmaxi_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu( ++ ++ // __lsx_vmaxi_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu( ++ ++ // __lsx_vmaxi_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du( ++ ++ // __lsx_vmin_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b( ++ ++ // __lsx_vmin_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h( ++ ++ // __lsx_vmin_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w( ++ ++ // __lsx_vmin_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d( ++ ++ // __lsx_vmini_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b( ++ ++ // __lsx_vmini_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h( ++ ++ // __lsx_vmini_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w( ++ ++ // __lsx_vmini_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d( ++ ++ // __lsx_vmin_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu( ++ ++ // __lsx_vmin_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu( ++ ++ // __lsx_vmin_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu( ++ ++ // __lsx_vmin_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du( ++ ++ // __lsx_vmini_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu( ++ ++ // __lsx_vmini_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu( ++ ++ // __lsx_vmini_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu( ++ ++ // __lsx_vmini_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du( ++ ++ // __lsx_vseq_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b( ++ ++ // __lsx_vseq_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h( ++ ++ // __lsx_vseq_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w( ++ ++ // __lsx_vseq_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d( ++ ++ // __lsx_vseqi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b( ++ ++ // __lsx_vseqi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h( ++ ++ // __lsx_vseqi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w( ++ ++ // __lsx_vseqi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d( ++ ++ // __lsx_vslti_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b( ++ ++ // __lsx_vslt_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b( ++ ++ // __lsx_vslt_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h( ++ ++ // __lsx_vslt_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w( ++ ++ // __lsx_vslt_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d( ++ ++ // __lsx_vslti_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h( ++ ++ // __lsx_vslti_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w( ++ ++ // __lsx_vslti_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d( ++ ++ // __lsx_vslt_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu( ++ ++ // __lsx_vslt_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu( ++ ++ // __lsx_vslt_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu( ++ ++ // __lsx_vslt_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du( ++ ++ // __lsx_vslti_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu( ++ ++ // __lsx_vslti_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu( ++ ++ // __lsx_vslti_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu( ++ ++ // __lsx_vslti_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du( ++ ++ // __lsx_vsle_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b( ++ ++ // __lsx_vsle_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h( ++ ++ // __lsx_vsle_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w( ++ ++ // __lsx_vsle_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d( ++ ++ // __lsx_vslei_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b( ++ ++ // __lsx_vslei_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h( ++ ++ // __lsx_vslei_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w( ++ ++ // __lsx_vslei_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d( ++ ++ // __lsx_vsle_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu( ++ ++ // __lsx_vsle_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu( ++ ++ // __lsx_vsle_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu( ++ ++ // __lsx_vsle_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du( ++ ++ // __lsx_vslei_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu( ++ ++ // __lsx_vslei_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu( ++ ++ // __lsx_vslei_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu( ++ ++ // __lsx_vslei_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du( ++ ++ // __lsx_vsat_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b( ++ ++ // __lsx_vsat_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h( ++ ++ // __lsx_vsat_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w( ++ ++ // __lsx_vsat_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d( ++ ++ // __lsx_vsat_bu ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu( ++ ++ // __lsx_vsat_hu ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu( ++ ++ // __lsx_vsat_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu( ++ ++ // __lsx_vsat_du ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du( ++ ++ // __lsx_vadda_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b( ++ ++ // __lsx_vadda_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h( ++ ++ // __lsx_vadda_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w( ++ ++ // __lsx_vadda_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d( ++ ++ // __lsx_vsadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b( ++ ++ // __lsx_vsadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h( ++ ++ // __lsx_vsadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w( ++ ++ // __lsx_vsadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d( ++ ++ // __lsx_vsadd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu( ++ ++ // __lsx_vsadd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu( ++ ++ // __lsx_vsadd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu( ++ ++ // __lsx_vsadd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du( ++ ++ // __lsx_vavg_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b( ++ ++ // __lsx_vavg_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h( ++ ++ // __lsx_vavg_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w( ++ ++ // __lsx_vavg_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d( ++ ++ // __lsx_vavg_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu( ++ ++ // __lsx_vavg_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu( ++ ++ // __lsx_vavg_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu( ++ ++ // __lsx_vavg_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du( ++ ++ // __lsx_vavgr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b( ++ ++ // __lsx_vavgr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h( ++ ++ // __lsx_vavgr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w( ++ ++ // __lsx_vavgr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d( ++ ++ // __lsx_vavgr_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu( ++ ++ // __lsx_vavgr_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu( ++ ++ // __lsx_vavgr_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu( ++ ++ // __lsx_vavgr_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du( ++ ++ // __lsx_vssub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b( ++ ++ // __lsx_vssub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h( ++ ++ // __lsx_vssub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w( ++ ++ // __lsx_vssub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d( ++ ++ // __lsx_vssub_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu( ++ ++ // __lsx_vssub_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu( ++ ++ // __lsx_vssub_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu( ++ ++ // __lsx_vssub_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du( ++ ++ // __lsx_vabsd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b( ++ ++ // __lsx_vabsd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h( ++ ++ // __lsx_vabsd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w( ++ ++ // __lsx_vabsd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d( ++ ++ // __lsx_vabsd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu( ++ ++ // __lsx_vabsd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu( ++ ++ // __lsx_vabsd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu( ++ ++ // __lsx_vabsd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du( ++ ++ // __lsx_vmul_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b( ++ ++ // __lsx_vmul_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h( ++ ++ // __lsx_vmul_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w( ++ ++ // __lsx_vmul_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d( ++ ++ // __lsx_vmadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b( ++ ++ // __lsx_vmadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h( ++ ++ // __lsx_vmadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w( ++ ++ // __lsx_vmadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d( ++ ++ // __lsx_vmsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b( ++ ++ // __lsx_vmsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h( ++ ++ // __lsx_vmsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w( ++ ++ // __lsx_vmsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d( ++ ++ // __lsx_vdiv_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b( ++ ++ // __lsx_vdiv_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h( ++ ++ // __lsx_vdiv_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w( ++ ++ // __lsx_vdiv_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d( ++ ++ // __lsx_vdiv_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu( ++ ++ // __lsx_vdiv_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu( ++ ++ // __lsx_vdiv_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu( ++ ++ // __lsx_vdiv_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du( ++ ++ // __lsx_vhaddw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b( ++ ++ // __lsx_vhaddw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h( ++ ++ // __lsx_vhaddw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w( ++ ++ // __lsx_vhaddw_hu_bu ++ // vd, vj, vk ++ // UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu( ++ ++ // __lsx_vhaddw_wu_hu ++ // vd, vj, vk ++ // UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu( ++ ++ // __lsx_vhaddw_du_wu ++ // vd, vj, vk ++ // UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu( ++ ++ // __lsx_vhsubw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b( ++ ++ // __lsx_vhsubw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h( ++ ++ // __lsx_vhsubw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w( ++ ++ // __lsx_vhsubw_hu_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu( ++ ++ // __lsx_vhsubw_wu_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu( ++ ++ // __lsx_vhsubw_du_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu( ++ ++ // __lsx_vmod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b( ++ ++ // __lsx_vmod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h( ++ ++ // __lsx_vmod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w( ++ ++ // __lsx_vmod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d( ++ ++ // __lsx_vmod_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu( ++ ++ // __lsx_vmod_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu( ++ ++ // __lsx_vmod_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu( ++ ++ // __lsx_vmod_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du( ++ ++ // __lsx_vreplve_b ++ // vd, vj, rk ++ // V16QI, V16QI, SI ++ v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b( ++ ++ // __lsx_vreplve_h ++ // vd, vj, rk ++ // V8HI, V8HI, SI ++ v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h( ++ ++ // __lsx_vreplve_w ++ // vd, vj, rk ++ // V4SI, V4SI, SI ++ v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w( ++ ++ // __lsx_vreplve_d ++ // vd, vj, rk ++ // V2DI, V2DI, SI ++ v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d( ++ ++ // __lsx_vreplvei_b ++ // vd, vj, ui4 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b( ++ ++ // __lsx_vreplvei_h ++ // vd, vj, ui3 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h( ++ ++ // __lsx_vreplvei_w ++ // vd, vj, ui2 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w( ++ ++ // __lsx_vreplvei_d ++ // vd, vj, ui1 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d( ++ ++ // __lsx_vpickev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b( ++ ++ // __lsx_vpickev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h( ++ ++ // __lsx_vpickev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w( ++ ++ // __lsx_vpickev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d( ++ ++ // __lsx_vpickod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b( ++ ++ // __lsx_vpickod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h( ++ ++ // __lsx_vpickod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w( ++ ++ // __lsx_vpickod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d( ++ ++ // __lsx_vilvh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b( ++ ++ // __lsx_vilvh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h( ++ ++ // __lsx_vilvh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w( ++ ++ // __lsx_vilvh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d( ++ ++ // __lsx_vilvl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b( ++ ++ // __lsx_vilvl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h( ++ ++ // __lsx_vilvl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w( ++ ++ // __lsx_vilvl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d( ++ ++ // __lsx_vpackev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b( ++ ++ // __lsx_vpackev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h( ++ ++ // __lsx_vpackev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w( ++ ++ // __lsx_vpackev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d( ++ ++ // __lsx_vpackod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b( ++ ++ // __lsx_vpackod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h( ++ ++ // __lsx_vpackod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w( ++ ++ // __lsx_vpackod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d( ++ ++ // __lsx_vshuf_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h( ++ ++ // __lsx_vshuf_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w( ++ ++ // __lsx_vshuf_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d( ++ ++ // __lsx_vand_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v( ++ ++ // __lsx_vandi_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b( ++ ++ // __lsx_vor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v( ++ ++ // __lsx_vori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b( ++ ++ // __lsx_vnor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v( ++ ++ // __lsx_vnori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b( ++ ++ // __lsx_vxor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v( ++ ++ // __lsx_vxori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b( ++ ++ // __lsx_vbitsel_v ++ // vd, vj, vk, va ++ // UV16QI, UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v( ++ ++ // __lsx_vbitseli_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b( ++ ++ // __lsx_vshuf4i_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b( ++ ++ // __lsx_vshuf4i_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h( ++ ++ // __lsx_vshuf4i_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w( ++ ++ // __lsx_vreplgr2vr_b ++ // vd, rj ++ // V16QI, SI ++ v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b( ++ ++ // __lsx_vreplgr2vr_h ++ // vd, rj ++ // V8HI, SI ++ v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h( ++ ++ // __lsx_vreplgr2vr_w ++ // vd, rj ++ // V4SI, SI ++ v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w( ++ ++ // __lsx_vreplgr2vr_d ++ // vd, rj ++ // V2DI, DI ++ v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d( ++ ++ // __lsx_vpcnt_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b( ++ ++ // __lsx_vpcnt_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h( ++ ++ // __lsx_vpcnt_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w( ++ ++ // __lsx_vpcnt_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d( ++ ++ // __lsx_vclo_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b( ++ ++ // __lsx_vclo_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h( ++ ++ // __lsx_vclo_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w( ++ ++ // __lsx_vclo_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d( ++ ++ // __lsx_vclz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b( ++ ++ // __lsx_vclz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h( ++ ++ // __lsx_vclz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w( ++ ++ // __lsx_vclz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d( ++ ++ // __lsx_vpickve2gr_b ++ // rd, vj, ui4 ++ // SI, V16QI, UQI ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b( ++ ++ // __lsx_vpickve2gr_h ++ // rd, vj, ui3 ++ // SI, V8HI, UQI ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h( ++ ++ // __lsx_vpickve2gr_w ++ // rd, vj, ui2 ++ // SI, V4SI, UQI ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w( ++ ++ // __lsx_vpickve2gr_d ++ // rd, vj, ui1 ++ // DI, V2DI, UQI ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d( ++ ++ // __lsx_vpickve2gr_bu ++ // rd, vj, ui4 ++ // USI, V16QI, UQI ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu( ++ ++ // __lsx_vpickve2gr_hu ++ // rd, vj, ui3 ++ // USI, V8HI, UQI ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu( ++ ++ // __lsx_vpickve2gr_wu ++ // rd, vj, ui2 ++ // USI, V4SI, UQI ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu( ++ ++ // __lsx_vpickve2gr_du ++ // rd, vj, ui1 ++ // UDI, V2DI, UQI ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du( ++ ++ // __lsx_vinsgr2vr_b ++ // vd, rj, ui4 ++ // V16QI, V16QI, SI, UQI ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b( ++ ++ // __lsx_vinsgr2vr_h ++ // vd, rj, ui3 ++ // V8HI, V8HI, SI, UQI ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h( ++ ++ // __lsx_vinsgr2vr_w ++ // vd, rj, ui2 ++ // V4SI, V4SI, SI, UQI ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w( ++ ++ // __lsx_vinsgr2vr_d ++ // vd, rj, ui1 ++ // V2DI, V2DI, SI, UQI ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d( ++ ++ // __lsx_vfcmp_caf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s( ++ ++ // __lsx_vfcmp_caf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d( ++ ++ // __lsx_vfcmp_cor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s( ++ ++ // __lsx_vfcmp_cor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d( ++ ++ // __lsx_vfcmp_cun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s( ++ ++ // __lsx_vfcmp_cun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d( ++ ++ // __lsx_vfcmp_cune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s( ++ ++ // __lsx_vfcmp_cune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d( ++ ++ // __lsx_vfcmp_cueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s( ++ ++ // __lsx_vfcmp_cueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d( ++ ++ // __lsx_vfcmp_ceq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s( ++ ++ // __lsx_vfcmp_ceq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d( ++ ++ // __lsx_vfcmp_cne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s( ++ ++ // __lsx_vfcmp_cne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d( ++ ++ // __lsx_vfcmp_clt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s( ++ ++ // __lsx_vfcmp_clt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d( ++ ++ // __lsx_vfcmp_cult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s( ++ ++ // __lsx_vfcmp_cult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d( ++ ++ // __lsx_vfcmp_cle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s( ++ ++ // __lsx_vfcmp_cle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d( ++ ++ // __lsx_vfcmp_cule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s( ++ ++ // __lsx_vfcmp_cule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d( ++ ++ // __lsx_vfcmp_saf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s( ++ ++ // __lsx_vfcmp_saf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d( ++ ++ // __lsx_vfcmp_sor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s( ++ ++ // __lsx_vfcmp_sor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d( ++ ++ // __lsx_vfcmp_sun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s( ++ ++ // __lsx_vfcmp_sun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d( ++ ++ // __lsx_vfcmp_sune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s( ++ ++ // __lsx_vfcmp_sune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d( ++ ++ // __lsx_vfcmp_sueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s( ++ ++ // __lsx_vfcmp_sueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d( ++ ++ // __lsx_vfcmp_seq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s( ++ ++ // __lsx_vfcmp_seq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d( ++ ++ // __lsx_vfcmp_sne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s( ++ ++ // __lsx_vfcmp_sne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d( ++ ++ // __lsx_vfcmp_slt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s( ++ ++ // __lsx_vfcmp_slt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d( ++ ++ // __lsx_vfcmp_sult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s( ++ ++ // __lsx_vfcmp_sult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d( ++ ++ // __lsx_vfcmp_sle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s( ++ ++ // __lsx_vfcmp_sle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d( ++ ++ // __lsx_vfcmp_sule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s( ++ ++ // __lsx_vfcmp_sule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d( ++ ++ // __lsx_vfadd_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s( ++ // __lsx_vfadd_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d( ++ ++ // __lsx_vfsub_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s( ++ ++ // __lsx_vfsub_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d( ++ ++ // __lsx_vfmul_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s( ++ ++ // __lsx_vfmul_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d( ++ ++ // __lsx_vfdiv_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s( ++ ++ // __lsx_vfdiv_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d( ++ ++ // __lsx_vfcvt_h_s ++ // vd, vj, vk ++ // V8HI, V4SF, V4SF ++ v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s( ++ ++ // __lsx_vfcvt_s_d ++ // vd, vj, vk ++ // V4SF, V2DF, V2DF ++ v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d( ++ ++ // __lsx_vfmin_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s( ++ ++ // __lsx_vfmin_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d( ++ ++ // __lsx_vfmina_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s( ++ ++ // __lsx_vfmina_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d( ++ ++ // __lsx_vfmax_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s( ++ ++ // __lsx_vfmax_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d( ++ ++ // __lsx_vfmaxa_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s( ++ ++ // __lsx_vfmaxa_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d( ++ ++ // __lsx_vfclass_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s( ++ ++ // __lsx_vfclass_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d( ++ ++ // __lsx_vfsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s( ++ ++ // __lsx_vfsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d( ++ ++ // __lsx_vfrecip_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s( ++ ++ // __lsx_vfrecip_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d( ++ ++ // __lsx_vfrint_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s( ++ ++ // __lsx_vfrint_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d( ++ ++ // __lsx_vfrsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s( ++ ++ // __lsx_vfrsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d( ++ ++ // __lsx_vflogb_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s( ++ ++ // __lsx_vflogb_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d( ++ ++ // __lsx_vfcvth_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h( ++ ++ // __lsx_vfcvth_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s( ++ ++ //gcc build fail ++ ++ // __lsx_vfcvtl_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h( ++ ++ // __lsx_vfcvtl_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s( ++ ++ // __lsx_vftint_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s( ++ ++ // __lsx_vftint_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d( ++ ++ // __lsx_vftint_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s( ++ ++ // __lsx_vftint_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d( ++ ++ // __lsx_vftintrz_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s( ++ ++ // __lsx_vftintrz_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d( ++ ++ // __lsx_vftintrz_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s( ++ ++ // __lsx_vftintrz_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d( ++ ++ // __lsx_vffint_s_w ++ // vd, vj ++ // V4SF, V4SI ++ v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w( ++ ++ // __lsx_vffint_d_l ++ // vd, vj ++ // V2DF, V2DI ++ v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l( ++ ++ // __lsx_vffint_s_wu ++ // vd, vj ++ // V4SF, UV4SI ++ v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu( ++ ++ // __lsx_vffint_d_lu ++ // vd, vj ++ // V2DF, UV2DI ++ v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu( ++ ++ // __lsx_vandn_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v( ++ ++ // __lsx_vneg_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b( ++ ++ // __lsx_vneg_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h( ++ ++ // __lsx_vneg_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w( ++ ++ // __lsx_vneg_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d( ++ ++ // __lsx_vmuh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b( ++ ++ // __lsx_vmuh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h( ++ ++ // __lsx_vmuh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w( ++ ++ // __lsx_vmuh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d( ++ ++ // __lsx_vmuh_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu( ++ ++ // __lsx_vmuh_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu( ++ ++ // __lsx_vmuh_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu( ++ ++ // __lsx_vmuh_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du( ++ ++ // __lsx_vsllwil_h_b ++ // vd, vj, ui3 ++ // V8HI, V16QI, UQI ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b( ++ ++ // __lsx_vsllwil_w_h ++ // vd, vj, ui4 ++ // V4SI, V8HI, UQI ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h( ++ ++ // __lsx_vsllwil_d_w ++ // vd, vj, ui5 ++ // V2DI, V4SI, UQI ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w( ++ ++ // __lsx_vsllwil_hu_bu ++ // vd, vj, ui3 ++ // UV8HI, UV16QI, UQI ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu( ++ ++ // __lsx_vsllwil_wu_hu ++ // vd, vj, ui4 ++ // UV4SI, UV8HI, UQI ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu( ++ ++ // __lsx_vsllwil_du_wu ++ // vd, vj, ui5 ++ // UV2DI, UV4SI, UQI ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu( ++ ++ // __lsx_vsran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h( ++ ++ // __lsx_vsran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w( ++ ++ // __lsx_vsran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d( ++ ++ // __lsx_vssran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h( ++ ++ // __lsx_vssran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w( ++ ++ // __lsx_vssran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d( ++ ++ // __lsx_vssran_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h( ++ ++ // __lsx_vssran_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w( ++ ++ // __lsx_vssran_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d( ++ ++ // __lsx_vsrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h( ++ ++ // __lsx_vsrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w( ++ ++ // __lsx_vsrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d( ++ ++ // __lsx_vssrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h( ++ ++ // __lsx_vssrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w( ++ ++ // __lsx_vssrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d( ++ ++ // __lsx_vssrarn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h( ++ ++ // __lsx_vssrarn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w( ++ ++ // __lsx_vssrarn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d( ++ ++ // __lsx_vsrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h( ++ ++ // __lsx_vsrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w( ++ ++ // __lsx_vsrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d( ++ ++ // __lsx_vssrln_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h( ++ ++ // __lsx_vssrln_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w( ++ ++ // __lsx_vssrln_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d( ++ ++ // __lsx_vsrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h( ++ ++ // __lsx_vsrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w( ++ ++ // __lsx_vsrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d( ++ ++ // __lsx_vssrlrn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h( ++ ++ // __lsx_vssrlrn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w( ++ ++ // __lsx_vssrlrn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d( ++ ++ // __lsx_vfrstpi_b ++ // vd, vj, ui5 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b( ++ ++ // __lsx_vfrstpi_h ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h( ++ ++ // __lsx_vfrstp_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b( ++ ++ // __lsx_vfrstp_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h( ++ ++ // __lsx_vshuf4i_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d( ++ ++ // __lsx_vbsrl_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v( ++ ++ // __lsx_vbsll_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v( ++ ++ // __lsx_vextrins_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b( ++ ++ // __lsx_vextrins_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h( ++ ++ // __lsx_vextrins_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, UQI ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w( ++ ++ // __lsx_vextrins_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, UQI ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d( ++ ++ // __lsx_vmskltz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b( ++ ++ // __lsx_vmskltz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h( ++ ++ // __lsx_vmskltz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w( ++ ++ // __lsx_vmskltz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d( ++ ++ // __lsx_vsigncov_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b( ++ ++ // __lsx_vsigncov_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h( ++ ++ // __lsx_vsigncov_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w( ++ ++ // __lsx_vsigncov_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d( ++ ++ // __lsx_vfmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s( ++ ++ // __lsx_vfmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d( ++ ++ // __lsx_vfmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s( ++ ++ // __lsx_vfmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d( ++ ++ // __lsx_vfnmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s( ++ ++ // __lsx_vfnmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d( ++ ++ // __lsx_vfnmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s( ++ ++ // __lsx_vfnmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d( ++ ++ // __lsx_vftintrne_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s( ++ ++ // __lsx_vftintrne_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d( ++ ++ // __lsx_vftintrp_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s( ++ ++ // __lsx_vftintrp_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d( ++ ++ // __lsx_vftintrm_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s( ++ ++ // __lsx_vftintrm_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d( ++ ++ // __lsx_vftint_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d( ++ ++ // __lsx_vffint_s_l ++ // vd, vj, vk ++ // V4SF, V2DI, V2DI ++ v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l( ++ ++ // __lsx_vftintrz_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d( ++ ++ // __lsx_vftintrp_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d( ++ ++ // __lsx_vftintrm_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d( ++ ++ // __lsx_vftintrne_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d( ++ ++ // __lsx_vftintl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s( ++ ++ // __lsx_vftinth_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s( ++ ++ // __lsx_vffinth_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w( ++ ++ // __lsx_vffintl_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w( ++ ++ // __lsx_vftintrzl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s( ++ ++ // __lsx_vftintrzh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s( ++ ++ // __lsx_vftintrpl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s( ++ ++ // __lsx_vftintrph_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s( ++ ++ // __lsx_vftintrml_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s( ++ ++ // __lsx_vftintrmh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s( ++ ++ // __lsx_vftintrnel_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s( ++ ++ // __lsx_vftintrneh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s( ++ ++ // __lsx_vfrintrne_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrne.s( ++ ++ // __lsx_vfrintrne_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrne.d( ++ ++ // __lsx_vfrintrz_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrz.s( ++ ++ // __lsx_vfrintrz_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrz.d( ++ ++ // __lsx_vfrintrp_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrp.s( ++ ++ // __lsx_vfrintrp_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrp.d( ++ ++ // __lsx_vfrintrm_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrm.s( ++ ++ // __lsx_vfrintrm_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrm.d( ++ ++ // __lsx_vstelm_b ++ // vd, rj, si8, idx ++ // VOID, V16QI, CVPOINTER, SI, UQI ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b( ++ // __lsx_vstelm_h ++ // vd, rj, si8, idx ++ // VOID, V8HI, CVPOINTER, SI, UQI ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h( ++ ++ // __lsx_vstelm_w ++ // vd, rj, si8, idx ++ // VOID, V4SI, CVPOINTER, SI, UQI ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w( ++ ++ // __lsx_vstelm_d ++ // vd, rj, si8, idx ++ // VOID, V2DI, CVPOINTER, SI, UQI ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d( ++ ++ // __lsx_vaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w( ++ ++ // __lsx_vaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h( ++ ++ // __lsx_vaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b( ++ ++ // __lsx_vaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w( ++ ++ // __lsx_vaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h( ++ ++ // __lsx_vaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b( ++ ++ // __lsx_vaddwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu( ++ ++ // __lsx_vaddwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu( ++ ++ // __lsx_vaddwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu( ++ ++ // __lsx_vaddwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu( ++ ++ // __lsx_vaddwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu( ++ ++ // __lsx_vaddwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu( ++ ++ // __lsx_vaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w( ++ ++ // __lsx_vaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h( ++ ++ // __lsx_vaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b( ++ ++ // __lsx_vaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w( ++ ++ // __lsx_vaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h( ++ ++ // __lsx_vaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b( ++ ++ // __lsx_vsubwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w( ++ ++ // __lsx_vsubwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h( ++ ++ // __lsx_vsubwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b( ++ ++ // __lsx_vsubwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w( ++ ++ // __lsx_vsubwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h( ++ ++ // __lsx_vsubwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b( ++ ++ // __lsx_vsubwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu( ++ ++ // __lsx_vsubwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu( ++ ++ // __lsx_vsubwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu( ++ ++ // __lsx_vsubwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu( ++ ++ // __lsx_vsubwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu( ++ ++ // __lsx_vsubwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu( ++ ++ // __lsx_vaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d( ++ ++ // __lsx_vaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d( ++ ++ // __lsx_vaddwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du( ++ ++ // __lsx_vaddwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du( ++ ++ // __lsx_vsubwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d( ++ ++ // __lsx_vsubwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d( ++ ++ // __lsx_vsubwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du( ++ ++ // __lsx_vsubwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du( ++ ++ // __lsx_vaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d( ++ ++ // __lsx_vaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d( ++ ++ // __lsx_vmulwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w( ++ ++ // __lsx_vmulwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h( ++ ++ // __lsx_vmulwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b( ++ ++ // __lsx_vmulwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w( ++ ++ // __lsx_vmulwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h( ++ ++ // __lsx_vmulwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b( ++ ++ // __lsx_vmulwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu( ++ ++ // __lsx_vmulwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu( ++ ++ // __lsx_vmulwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu( ++ ++ // __lsx_vmulwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu( ++ ++ // __lsx_vmulwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu( ++ ++ // __lsx_vmulwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu( ++ ++ // __lsx_vmulwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w( ++ ++ // __lsx_vmulwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h( ++ ++ // __lsx_vmulwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b( ++ ++ // __lsx_vmulwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w( ++ ++ // __lsx_vmulwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h( ++ ++ // __lsx_vmulwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b( ++ ++ // __lsx_vmulwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d( ++ ++ // __lsx_vmulwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d( ++ ++ // __lsx_vmulwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du( ++ ++ // __lsx_vmulwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du( ++ ++ // __lsx_vmulwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d( ++ ++ // __lsx_vmulwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d( ++ ++ // __lsx_vhaddw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d( ++ ++ // __lsx_vhaddw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du( ++ ++ // __lsx_vhsubw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d( ++ ++ // __lsx_vhsubw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du( ++ ++ // __lsx_vmaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w( ++ ++ // __lsx_vmaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h( ++ ++ // __lsx_vmaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b( ++ ++ // __lsx_vmaddwev_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu( ++ ++ // __lsx_vmaddwev_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu( ++ ++ // __lsx_vmaddwev_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu( ++ ++ // __lsx_vmaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w( ++ ++ // __lsx_vmaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h( ++ ++ // __lsx_vmaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b( ++ ++ // __lsx_vmaddwod_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu( ++ ++ // __lsx_vmaddwod_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu( ++ ++ // __lsx_vmaddwod_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu( ++ ++ // __lsx_vmaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w( ++ ++ // __lsx_vmaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h( ++ ++ // __lsx_vmaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b( ++ ++ // __lsx_vmaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w( ++ ++ // __lsx_vmaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h( ++ ++ // __lsx_vmaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b( ++ ++ // __lsx_vmaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d( ++ ++ // __lsx_vmaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d( ++ ++ // __lsx_vmaddwev_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du( ++ ++ // __lsx_vmaddwod_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du( ++ ++ // __lsx_vmaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d( ++ ++ // __lsx_vmaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d( ++ ++ // __lsx_vrotr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b( ++ ++ // __lsx_vrotr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h( ++ ++ // __lsx_vrotr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w( ++ ++ // __lsx_vrotr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d( ++ ++ // __lsx_vadd_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q( ++ ++ // __lsx_vsub_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q( ++ ++ // __lsx_vldrepl_b ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b( ++ ++ // __lsx_vldrepl_h ++ // vd, rj, si11 ++ // V8HI, CVPOINTER, SI ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h( ++ ++ // __lsx_vldrepl_w ++ // vd, rj, si10 ++ // V4SI, CVPOINTER, SI ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w( ++ ++ // __lsx_vldrepl_d ++ // vd, rj, si9 ++ // V2DI, CVPOINTER, SI ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d( ++ ++ // __lsx_vmskgez_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b( ++ ++ // __lsx_vmsknz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b( ++ ++ // __lsx_vexth_h_b ++ // vd, vj ++ // V8HI, V16QI ++ v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b( ++ ++ // __lsx_vexth_w_h ++ // vd, vj ++ // V4SI, V8HI ++ v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h( ++ ++ // __lsx_vexth_d_w ++ // vd, vj ++ // V2DI, V4SI ++ v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w( ++ ++ // __lsx_vexth_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d( ++ ++ // __lsx_vexth_hu_bu ++ // vd, vj ++ // UV8HI, UV16QI ++ v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu( ++ ++ // __lsx_vexth_wu_hu ++ // vd, vj ++ // UV4SI, UV8HI ++ v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu( ++ ++ // __lsx_vexth_du_wu ++ // vd, vj ++ // UV2DI, UV4SI ++ v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu( ++ ++ // __lsx_vexth_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du( ++ ++ // __lsx_vrotri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b( ++ ++ // __lsx_vrotri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h( ++ ++ // __lsx_vrotri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w( ++ ++ // __lsx_vrotri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d( ++ ++ // __lsx_vextl_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d( ++ ++ // __lsx_vsrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h( ++ ++ // __lsx_vsrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w( ++ ++ // __lsx_vsrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d( ++ ++ // __lsx_vsrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q( ++ ++ // __lsx_vssrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h( ++ ++ // __lsx_vssrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w( ++ ++ // __lsx_vssrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d( ++ ++ // __lsx_vssrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q( ++ ++ // __lsx_vssrlni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h( ++ ++ // __lsx_vssrlni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w( ++ ++ // __lsx_vssrlni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d( ++ ++ // __lsx_vssrlni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q( ++ ++ // __lsx_vssrlrni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h( ++ ++ // __lsx_vssrlrni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w( ++ ++ // __lsx_vssrlrni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d( ++ ++ // __lsx_vssrlrni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q( ++ ++ // __lsx_vssrlrni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h( ++ ++ // __lsx_vssrlrni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w( ++ ++ // __lsx_vssrlrni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d( ++ ++ // __lsx_vssrlrni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q( ++ ++ // __lsx_vsrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h( ++ ++ // __lsx_vsrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w( ++ ++ // __lsx_vsrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d( ++ ++ // __lsx_vsrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q( ++ ++ // __lsx_vsrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h( ++ ++ // __lsx_vsrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w( ++ ++ // __lsx_vsrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d( ++ ++ // __lsx_vsrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q( ++ ++ // __lsx_vssrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h( ++ ++ // __lsx_vssrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w( ++ ++ // __lsx_vssrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d( ++ ++ // __lsx_vssrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q( ++ ++ // __lsx_vssrani_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h( ++ ++ // __lsx_vssrani_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w( ++ ++ // __lsx_vssrani_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d( ++ ++ // __lsx_vssrani_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q( ++ ++ // __lsx_vssrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h( ++ ++ // __lsx_vssrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w( ++ ++ // __lsx_vssrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d( ++ ++ // __lsx_vssrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q( ++ ++ // __lsx_vssrarni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h( ++ ++ // __lsx_vssrarni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w( ++ ++ // __lsx_vssrarni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d( ++ ++ // __lsx_vssrarni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q( ++ ++ // __lsx_vpermi_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w( ++ ++ // __lsx_vld ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld( ++ ++ // __lsx_vst ++ // vd, rj, si12 ++ // VOID, V16QI, CVPOINTER, SI ++ __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst( ++ ++ // __lsx_vssrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h( ++ ++ // __lsx_vssrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w( ++ ++ // __lsx_vssrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d( ++ ++ // __lsx_vssrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h( ++ ++ // __lsx_vssrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w( ++ ++ // __lsx_vssrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d( ++ ++ // __lsx_vorn_v ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v( ++ ++ // __lsx_vldi ++ // vd, i13 ++ // V2DI, HI ++ v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi( ++ ++ // __lsx_vshuf_b ++ // vd, vj, vk, va ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b( ++ ++ // __lsx_vldx ++ // vd, rj, rk ++ // V16QI, CVPOINTER, DI ++ v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx( ++ ++ // __lsx_vstx ++ // vd, rj, rk ++ // VOID, V16QI, CVPOINTER, DI ++ __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx( ++ ++ // __lsx_vextl_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du( ++ ++ // __lsx_bnz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v( ++ ++ // __lsx_bz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v( ++ ++ // __lsx_bnz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b( ++ ++ // __lsx_bnz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h( ++ ++ // __lsx_bnz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w( ++ ++ // __lsx_bnz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d( ++ ++ // __lsx_bz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b( ++ ++ // __lsx_bz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h( ++ ++ // __lsx_bz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w( ++ ++ // __lsx_bz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d( ++ ++ v16i8_r = __lsx_vsrlrni_b_h(v16i8_a, v16i8_b, 2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h( ++ ++ v8i16_r = __lsx_vsrlrni_h_w(v8i16_a, v8i16_b, 2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w( ++ ++ v4i32_r = __lsx_vsrlrni_w_d(v4i32_a, v4i32_b, 2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d( ++ ++ v2i64_r = __lsx_vsrlrni_d_q(v2i64_a, v2i64_b, 2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q( ++ ++ v16i8_r = __lsx_vrepli_b(2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrepli.b( ++ ++ v8i16_r = __lsx_vrepli_h(2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrepli.h( ++ ++ v4i32_r = __lsx_vrepli_w(2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrepli.w( ++ ++ v2i64_r = __lsx_vrepli_d(2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrepli.d( ++} +diff --git a/clang/test/CodeGen/loongarch-inline-asm-modifiers.c b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c +new file mode 100644 +index 000000000..412eca2bd +--- /dev/null ++++ b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c +@@ -0,0 +1,50 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \ ++// RUN: | FileCheck %s ++ ++// This checks that the frontend will accept inline asm operand modifiers ++ ++int printf(const char*, ...); ++ ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); ++ ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,$1;\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,${1:D};\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) ++// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f" ++// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f" ++int b[8] = {0,1,2,3,4,5,6,7}; ++int main() ++{ ++ int i; ++ v2i64 v2i64_r; ++ v4i64 v4i64_r; ++ ++ // The first word. Notice, no 'D' ++ {asm ( ++ "ld.w %0,%1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)));} ++ ++ printf("%d\n",i); ++ ++ // The second word ++ {asm ( ++ "ld.w %0,%D1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)) ++ );} ++ ++ // LSX registers ++ { asm("vldi %w0,1" ++ : "=f"(v2i64_r)); } ++ ++ printf("%d\n", i); ++ ++ // LASX registers ++ { asm("xldi %u0,1" ++ : "=f"(v4i64_r)); } ++ ++ printf("%d\n",i); ++ ++ return 1; ++} +diff --git a/clang/test/CodeGen/loongarch-inline-asm.c b/clang/test/CodeGen/loongarch-inline-asm.c +new file mode 100644 +index 000000000..1f995ac79 +--- /dev/null ++++ b/clang/test/CodeGen/loongarch-inline-asm.c +@@ -0,0 +1,31 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s ++ ++int data; ++ ++void m () { ++ asm("ld.w $r1, %0" :: "m"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(ptr elementtype(i32) @data) ++} ++ ++void ZC () { ++ asm("ll.w $r1, %0" :: "ZC"(data)); ++ // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(ptr elementtype(i32) @data) ++} ++ ++void ZB () { ++ asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data)); ++ // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(ptr elementtype(i32) @data) ++} ++ ++void R () { ++ asm("ld.w $r1, %0" :: "R"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(ptr elementtype(i32) @data) ++} ++ ++int *p; ++void preld () { ++ asm("preld 0, %0, 2" :: "r"(p)); ++ // CHECK: %0 = load ptr, ptr @p, align 8 ++ // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(ptr %0) ++} +diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +new file mode 100644 +index 000000000..dc5ffaf08 +--- /dev/null ++++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +@@ -0,0 +1,95 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ ++// RUN: -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++/// Ensure that fields inherited from a parent struct are treated in the same ++/// way as fields directly in the child for the purposes of LoongArch ABI rules. ++ ++struct parent1_int32_s { ++ int32_t i1; ++}; ++ ++struct child1_int32_s : parent1_int32_s { ++ int32_t i2; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce) ++struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) { ++ return a; ++} ++ ++struct parent2_int32_s { ++ int32_t i1; ++}; ++ ++struct child2_float_s : parent2_int32_s { ++ float f1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1) ++struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) { ++ return a; ++} ++ ++struct parent3_float_s { ++ float f1; ++}; ++ ++struct child3_int64_s : parent3_float_s { ++ int64_t i1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1) ++struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) { ++ return a; ++} ++ ++struct parent4_double_s { ++ double d1; ++}; ++ ++struct child4_double_s : parent4_double_s { ++ double d1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1) ++struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) { ++ return a; ++} ++ ++/// When virtual inheritance is used, the resulting struct isn't eligible for ++/// passing in registers. ++ ++struct parent5_virtual_s { ++ int32_t i1; ++}; ++ ++struct child5_virtual_s : virtual parent5_virtual_s { ++ float f1; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr{{.*}} %this, ptr{{.*}} dereferenceable(12) %0) ++struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) { ++ return a; ++} ++ ++/// Check for correct lowering in the presence of diamoned inheritance. ++ ++struct parent6_float_s { ++ float f1; ++}; ++ ++struct child6a_s : parent6_float_s { ++}; ++ ++struct child6b_s : parent6_float_s { ++}; ++ ++struct grandchild_6_s : child6a_s, child6b_s { ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1) ++struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) { ++ return a; ++} +diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp +index 7c11fe671..56eb5b708 100644 +--- a/clang/test/Driver/baremetal.cpp ++++ b/clang/test/Driver/baremetal.cpp +@@ -105,7 +105,7 @@ + // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include" + + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: -target aarch64-none-elf \ ++// RUN: -target aarch64-none-elf --sysroot= \ + // RUN: | FileCheck --check-prefix=CHECK-AARCH64-NO-HOST-INC %s + // Verify that the bare metal driver does not include any host system paths: + // CHECK-AARCH64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] +diff --git a/clang/test/Driver/hexagon-toolchain-linux.c b/clang/test/Driver/hexagon-toolchain-linux.c +index 05ae17339..986c2dd61 100644 +--- a/clang/test/Driver/hexagon-toolchain-linux.c ++++ b/clang/test/Driver/hexagon-toolchain-linux.c +@@ -100,7 +100,7 @@ + // ----------------------------------------------------------------------------- + // internal-isystem for linux with and without musl + // ----------------------------------------------------------------------------- +-// RUN: %clang -### -target hexagon-unknown-linux-musl \ ++// RUN: %clang -### -target hexagon-unknown-linux-musl --sysroot= \ + // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ + // RUN: -resource-dir=%S/Inputs/resource_dir \ + // RUN: %s 2>&1 \ +@@ -110,7 +110,7 @@ + // CHECK008-SAME: {{^}} "-internal-isystem" "[[RESOURCE]]/include" + // CHECK008-SAME: {{^}} "-internal-externc-isystem" "[[INSTALLED_DIR]]/../target/hexagon/include" + +-// RUN: %clang -### -target hexagon-unknown-linux \ ++// RUN: %clang -### -target hexagon-unknown-linux --sysroot= \ + // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ + // RUN: -resource-dir=%S/Inputs/resource_dir \ + // RUN: %s 2>&1 \ +diff --git a/clang/test/Driver/loongarch-alignment-feature.c b/clang/test/Driver/loongarch-alignment-feature.c +new file mode 100644 +index 000000000..2270ff536 +--- /dev/null ++++ b/clang/test/Driver/loongarch-alignment-feature.c +@@ -0,0 +1,8 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s ++ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s ++ ++// CHECK-UNALIGNED: "-target-feature" "+unaligned-access" ++// CHECK-ALIGNED: "-target-feature" "-unaligned-access" +diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c +new file mode 100644 +index 000000000..196862229 +--- /dev/null ++++ b/clang/test/Driver/loongarch-march.c +@@ -0,0 +1,15 @@ ++/// This test checks the valid cpu model which is supported by LoongArch. ++ ++// RUN: %clang --target=loongarch64 -march=la264 -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=LA264 %t %s ++// RUN: %clang --target=loongarch64 -march=la364 -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=LA364 %t %s ++// RUN: %clang --target=loongarch64 -march=la464 -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=LA464 %t %s ++// RUN: %clang --target=loongarch64 -march=xxx -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=INVALID %t %s ++ ++// LA264: "-target-cpu la264" "-target-abi lp64" ++// LA364: "-target-cpu la364" "-target-abi lp64" ++// LA464: "-target-cpu la464" "-target-abi lp64" ++// INVALID: error: unknown target CPU 'xxx' +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +new file mode 100644 +index 000000000..a32853d17 +--- /dev/null ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -0,0 +1,10 @@ ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | grep loongarch | FileCheck %s ++ ++// CHECK: #define __loongarch64 1 ++// CHECK-NEXT: #define __loongarch__ 1 ++// CHECK-NEXT: #define __loongarch_double_float 1 ++// CHECK-NEXT: #define __loongarch_fpr 64 ++// CHECK-NEXT: #define __loongarch_frlen 64 ++// CHECK-NEXT: #define __loongarch_grlen 64 ++// CHECK-NEXT: #define __loongarch_hard_float 1 ++// CHECK-NEXT: #define __loongarch_lp64 1 +diff --git a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +index 75928d912..3350ee3fe 100644 +--- a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp ++++ b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +@@ -104,6 +104,11 @@ extern "C" int throw_exception() { + if (Triple.isPPC()) + return; + ++ // FIXME: LoongArch64 fails due to `Symbols not found: ++ // [DW.ref.__gxx_personality_v0]` ++ if (Triple.isLoongArch64()) ++ return; ++ + // FIXME: ARM fails due to `Not implemented relocation type!` + if (Triple.isARM()) + return; diff --git a/clang.spec b/clang.spec index 639e472..b7d3cbb 100644 --- a/clang.spec +++ b/clang.spec @@ -1,4 +1,4 @@ -%define anolis_release .0.2 +%define anolis_release .0.3 %bcond_with compat_build %bcond_without check @@ -68,7 +68,7 @@ Patch4: 0001-cmake-Allow-shared-libraries-to-customize-the-soname.patch Patch5: 0001-Work-around-gcc-miscompile.patch Patch7: 0010-PATCH-clang-Produce-DWARF4-by-default.patch Patch8: disable-recommonmark.patch - +Patch9: 0001-Support-LoongArch.patch # Patches for clang-tools-extra %if %{without compat_build} @@ -77,6 +77,9 @@ Patch201: 0001-clang-tools-extra-Make-test-dependency-on-LLVMHello-.patch BuildRequires: gcc BuildRequires: gcc-c++ +%ifarch loongarch64 +BuildRequires: clang +%endif BuildRequires: cmake BuildRequires: ninja-build %if %{with compat_build} @@ -165,7 +168,9 @@ libomp-devel to enable -fopenmp. Summary: Runtime library for clang Requires: %{name}-resource-filesystem%{?_isa} = %{version} # RHEL specific: Use libstdc++ from gcc12 by default. rhbz#2064507 +%ifnarch loongarch64 Requires: gcc-toolset-12-gcc-c++ +%endif Recommends: compiler-rt%{?_isa} = %{version} # libomp-devel is required, so clang can find the omp.h header when compiling # with -fopenmp. @@ -323,6 +328,10 @@ CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized" # We set CLANG_DEFAULT_PIE_ON_LINUX=OFF to match the default used by Fedora's GCC. %cmake .. -G Ninja \ -DCLANG_DEFAULT_PIE_ON_LINUX=OFF \ +%ifarch loongarch64 + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ +%endif -DLLVM_PARALLEL_LINK_JOBS=1 \ -DLLVM_LINK_LLVM_DYLIB:BOOL=ON \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ @@ -374,8 +383,12 @@ CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized" %ifarch %{arm} -DCLANG_DEFAULT_LINKER=lld \ %endif +%ifnarch loongarch64 -DCLANG_DEFAULT_UNWINDLIB=libgcc \ -DGCC_INSTALL_PREFIX=/opt/rh/gcc-toolset-12/root/usr +%else + -DCLANG_DEFAULT_UNWINDLIB=libgcc +%endif %cmake_build @@ -608,6 +621,11 @@ false %endif %changelog +* Fri Aug 11 2023 Chen Li - 15.0.7-1.0.3 +- Support LoongArch +- Workaround: add buildrequires of clang on loongarch64 as + the missing of gcc-toolset-12 currently + * Wed Jul 19 2023 Zhao Hang - 15.0.7-1.0.2 - Add loongarch64 arch -- Gitee