From 366b8b70b143f6b3e6bf967c5c638e65216ea0d5 Mon Sep 17 00:00:00 2001 From: chenli Date: Tue, 9 Apr 2024 14:36:13 +0800 Subject: [PATCH] LoongArch: Backport patches from release/18.x Sync to 9b3edb592deb except for CALL36 relative patches which need to be consistent with GCC. --- ...Use-the-ClangBuiltin-class-to-automa.patch | 507 + ...Support-compiler-options-mlsx-mlasx-.patch | 351 + ...Add-ABI-implementation-of-passing-ve.patch | 34 + ...Support-the-builtin-functions-for-LS.patch | 5094 +++ ...Support-the-builtin-functions-for-LA.patch | 5143 +++ ...ch-CodeGen-Add-LSX-builtin-testcases.patch | 12431 ++++++ ...h-CodeGen-Add-LASX-builtin-testcases.patch | 11693 ++++++ ...Do-not-pass-vector-arguments-via-vec.patch | 32010 ++++++++++++++++ ...sanitize-cfi-icall-on-loongarch64-67.patch | 42 + ...Generate-_mcount-instead-of-mcount-6.patch | 62 + ...dd-some-ABI-regression-tests-for-emp.patch | 87 + ...I-mismatch-with-gcc-g-about-empty-st.patch | 86 + ...Arch-Pre-commit-test-for-issue-70890.patch | 47 + ...I-mismatch-with-g-when-handling-empt.patch | 69 + ...oongArch-to-fno-direct-access-extern.patch | 5 +- ...Precommit-test-for-fix-wrong-return-.patch | 109 + ...Fix-wrong-return-value-type-of-__ioc.patch | 73 + clang.spec | 27 +- 18 files changed, 67866 insertions(+), 4 deletions(-) create mode 100644 0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch create mode 100644 0002-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch create mode 100644 0003-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch create mode 100644 0004-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch create mode 100644 0005-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch create mode 100644 0006-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch create mode 100644 0007-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch create mode 100644 0008-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch create mode 100644 0009-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch create mode 100644 0010-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch create mode 100644 0011-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch create mode 100644 0012-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch create mode 100644 0013-LoongArch-Pre-commit-test-for-issue-70890.patch create mode 100644 0014-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch rename 0002-Driver-Default-LoongArch-to-fno-direct-access-extern.patch => 0015-Driver-Default-LoongArch-to-fno-direct-access-extern.patch (94%) create mode 100644 0016-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch create mode 100644 0017-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch diff --git a/0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch b/0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch new file mode 100644 index 0000000..6aa0599 --- /dev/null +++ b/0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch @@ -0,0 +1,507 @@ +From e5c03f299c1761eec0ae325d995eab121f1dd3a3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 9 Aug 2023 16:01:37 +0800 +Subject: [PATCH 01/17] [Clang][LoongArch] Use the ClangBuiltin class to + automatically generate support for CBE and CFE + +Fixed the type modifier (L->W), removed redundant feature checking code +since the feature has already been checked in `EmitBuiltinExpr`. And +Cleaned up unused diagnostic information. + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D156866 + +(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) + +Change-Id: I058f02e311dd67dc2ec63e404e4bb58e852da1b8 +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 25 ++-- + .../clang/Basic/DiagnosticSemaKinds.td | 7 - + clang/lib/CodeGen/CGBuiltin.cpp | 130 ------------------ + clang/lib/CodeGen/CodeGenFunction.h | 1 - + clang/lib/Sema/SemaChecking.cpp | 50 +------ + .../CodeGen/LoongArch/intrinsic-la32-error.c | 118 +++++++--------- + 6 files changed, 64 insertions(+), 267 deletions(-) + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 7f2c8403410d..20510e18fe58 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -16,8 +16,7 @@ + #endif + + // TODO: Support more builtins. +-// TODO: Added feature constraints. +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") + TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +@@ -26,36 +25,36 @@ TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") + TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") + + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index c88f25209fc0..0e97620945af 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -11853,10 +11853,6 @@ def err_non_designated_init_used : Error< + def err_cast_from_randomized_struct : Error< + "casting from randomized structure pointer type %0 to %1">; + +-// LoongArch-specific Diagnostics +-def err_loongarch_builtin_requires_la64 : Error< +- "this builtin requires target: loongarch64">; +- + // Unsafe buffer usage diagnostics. + def warn_unsafe_buffer_variable : Warning< + "%0 is an %select{unsafe pointer used for buffer access|unsafe buffer that " +@@ -11872,9 +11868,6 @@ def note_unsafe_buffer_variable_fixit_group : Note< + "change type of %0 to '%select{std::span|std::array|std::span::iterator}1' to preserve bounds information%select{|, and change %2 to '%select{std::span|std::array|std::span::iterator}1' to propagate bounds information between them}3">; + def note_safe_buffer_usage_suggestions_disabled : Note< + "pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions">; +-def err_loongarch_builtin_requires_la32 : Error< +- "this builtin requires target: loongarch32">; +- + def err_builtin_pass_in_regs_non_class : Error< + "argument %0 is not an unqualified class type">; + +diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp +index 30f5f4e7061c..e512762fafaf 100644 +--- a/clang/lib/CodeGen/CGBuiltin.cpp ++++ b/clang/lib/CodeGen/CGBuiltin.cpp +@@ -43,7 +43,6 @@ + #include "llvm/IR/IntrinsicsARM.h" + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsHexagon.h" +-#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" + #include "llvm/IR/IntrinsicsR600.h" +@@ -5588,9 +5587,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); + default: + return nullptr; + } +@@ -20418,129 +20414,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Ops, ""); + } +- +-Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, +- const CallExpr *E) { +- SmallVector Ops; +- +- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) +- Ops.push_back(EmitScalarExpr(E->getArg(i))); +- +- Intrinsic::ID ID = Intrinsic::not_intrinsic; +- +- switch (BuiltinID) { +- default: +- llvm_unreachable("unexpected builtin ID."); +- case LoongArch::BI__builtin_loongarch_cacop_d: +- ID = Intrinsic::loongarch_cacop_d; +- break; +- case LoongArch::BI__builtin_loongarch_cacop_w: +- ID = Intrinsic::loongarch_cacop_w; +- break; +- case LoongArch::BI__builtin_loongarch_dbar: +- ID = Intrinsic::loongarch_dbar; +- break; +- case LoongArch::BI__builtin_loongarch_break: +- ID = Intrinsic::loongarch_break; +- break; +- case LoongArch::BI__builtin_loongarch_ibar: +- ID = Intrinsic::loongarch_ibar; +- break; +- case LoongArch::BI__builtin_loongarch_movfcsr2gr: +- ID = Intrinsic::loongarch_movfcsr2gr; +- break; +- case LoongArch::BI__builtin_loongarch_movgr2fcsr: +- ID = Intrinsic::loongarch_movgr2fcsr; +- break; +- case LoongArch::BI__builtin_loongarch_syscall: +- ID = Intrinsic::loongarch_syscall; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_b_w: +- ID = Intrinsic::loongarch_crc_w_b_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_h_w: +- ID = Intrinsic::loongarch_crc_w_h_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_w_w: +- ID = Intrinsic::loongarch_crc_w_w_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_d_w: +- ID = Intrinsic::loongarch_crc_w_d_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: +- ID = Intrinsic::loongarch_crcc_w_b_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: +- ID = Intrinsic::loongarch_crcc_w_h_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: +- ID = Intrinsic::loongarch_crcc_w_w_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: +- ID = Intrinsic::loongarch_crcc_w_d_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrrd_w: +- ID = Intrinsic::loongarch_csrrd_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- ID = Intrinsic::loongarch_csrwr_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- ID = Intrinsic::loongarch_csrxchg_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrrd_d: +- ID = Intrinsic::loongarch_csrrd_d; +- break; +- case LoongArch::BI__builtin_loongarch_csrwr_d: +- ID = Intrinsic::loongarch_csrwr_d; +- break; +- case LoongArch::BI__builtin_loongarch_csrxchg_d: +- ID = Intrinsic::loongarch_csrxchg_d; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_b: +- ID = Intrinsic::loongarch_iocsrrd_b; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_h: +- ID = Intrinsic::loongarch_iocsrrd_h; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_w: +- ID = Intrinsic::loongarch_iocsrrd_w; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_d: +- ID = Intrinsic::loongarch_iocsrrd_d; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_b: +- ID = Intrinsic::loongarch_iocsrwr_b; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_h: +- ID = Intrinsic::loongarch_iocsrwr_h; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_w: +- ID = Intrinsic::loongarch_iocsrwr_w; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_d: +- ID = Intrinsic::loongarch_iocsrwr_d; +- break; +- case LoongArch::BI__builtin_loongarch_cpucfg: +- ID = Intrinsic::loongarch_cpucfg; +- break; +- case LoongArch::BI__builtin_loongarch_asrtle_d: +- ID = Intrinsic::loongarch_asrtle_d; +- break; +- case LoongArch::BI__builtin_loongarch_asrtgt_d: +- ID = Intrinsic::loongarch_asrtgt_d; +- break; +- case LoongArch::BI__builtin_loongarch_lddir_d: +- ID = Intrinsic::loongarch_lddir_d; +- break; +- case LoongArch::BI__builtin_loongarch_ldpte_d: +- ID = Intrinsic::loongarch_ldpte_d; +- break; +- // TODO: Support more Intrinsics. +- } +- +- assert(ID != Intrinsic::not_intrinsic); +- +- llvm::Function *F = CGM.getIntrinsic(ID); +- return Builder.CreateCall(F, Ops); +-} +diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h +index 8722fd4550e4..143e0707b942 100644 +--- a/clang/lib/CodeGen/CodeGenFunction.h ++++ b/clang/lib/CodeGen/CodeGenFunction.h +@@ -4316,7 +4316,6 @@ public: + llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue); +- llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID); +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index a94f009f3fa6..a8416bf4de92 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -3827,39 +3827,12 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + default: + break; + case LoongArch::BI__builtin_loongarch_cacop_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); +- [[fallthrough]]; + case LoongArch::BI__builtin_loongarch_cacop_w: { +- if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && +- !TI.hasFeature("32bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la32) +- << TheCall->getSourceRange(); + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); + SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), + llvm::maxIntN(12)); + break; + } +- case LoongArch::BI__builtin_loongarch_crc_w_b_w: +- case LoongArch::BI__builtin_loongarch_crc_w_h_w: +- case LoongArch::BI__builtin_loongarch_crc_w_w_w: +- case LoongArch::BI__builtin_loongarch_crc_w_d_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: +- case LoongArch::BI__builtin_loongarch_iocsrrd_d: +- case LoongArch::BI__builtin_loongarch_iocsrwr_d: +- case LoongArch::BI__builtin_loongarch_asrtle_d: +- case LoongArch::BI__builtin_loongarch_asrtgt_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); +- break; + case LoongArch::BI__builtin_loongarch_break: + case LoongArch::BI__builtin_loongarch_dbar: + case LoongArch::BI__builtin_loongarch_ibar: +@@ -3867,35 +3840,16 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + // Check if immediate is in [0, 32767]. + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); + case LoongArch::BI__builtin_loongarch_csrrd_w: +- return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrrd_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); ++ case LoongArch::BI__builtin_loongarch_csrwr_w: + case LoongArch::BI__builtin_loongarch_csrwr_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); ++ case LoongArch::BI__builtin_loongarch_csrxchg_w: + case LoongArch::BI__builtin_loongarch_csrxchg_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_lddir_d: + case LoongArch::BI__builtin_loongarch_ldpte_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +index 0264c2948934..db113a13eb5a 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +@@ -1,9 +1,58 @@ + // RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null ++// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s 2>&1 \ ++// RUN: | FileCheck %s + + #include + ++#ifdef FEATURE_CHECK ++void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { ++// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit ++ __builtin_loongarch_cacop_d(1, v_ul[0], 1024); ++ ++// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit ++ v_i[0] = __builtin_loongarch_crc_w_b_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit ++ v_i[1] = __builtin_loongarch_crc_w_h_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit ++ v_i[2] = __builtin_loongarch_crc_w_w_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit ++ v_i[3] = __builtin_loongarch_crc_w_d_w(c, v_i[0]); ++ ++// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit ++ v_i[4] = __builtin_loongarch_crcc_w_b_w(c, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit ++ v_i[5] = __builtin_loongarch_crcc_w_h_w(s, v_i[0]); ++// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit ++ v_i[6] = __builtin_loongarch_crcc_w_w_w(v_i[0], v_i[1]); ++// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit ++ v_i[7] = __builtin_loongarch_crcc_w_d_w(v_l[0], v_i[0]); ++ ++// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit ++ v_ul[0] = __builtin_loongarch_csrrd_d(1); ++// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit ++ v_ul[1] = __builtin_loongarch_csrwr_d(v_ul[0], 1); ++// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit ++ v_ul[2] = __builtin_loongarch_csrxchg_d(v_ul[0], v_ul[1], 1); ++ ++ ++// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit ++ v_ul[3] = __builtin_loongarch_iocsrrd_d(ui); ++// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit ++ __builtin_loongarch_iocsrwr_d(v_ul[0], ui); ++ ++// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit ++ __builtin_loongarch_asrtle_d(v_l[0], v_l[1]); ++// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit ++ __builtin_loongarch_asrtgt_d(v_l[0], v_l[1]); ++ ++// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit ++ v_ul[4] = __builtin_loongarch_lddir_d(v_l[0], 1); ++// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit ++ __builtin_loongarch_ldpte_d(v_l[0], 1); ++} ++#endif ++ + void cacop_d(unsigned long int a) { +- __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} + __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} +@@ -47,49 +96,6 @@ void syscall(int a) { + __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} + } + +-int crc_w_b_w(char a, int b) { +- return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_h_w(short a, int b) { +- return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_w_w(int a, int b) { +- return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_d_w(long int a, int b) { +- return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +-int crcc_w_b_w(char a, int b) { +- return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_h_w(short a, int b) { +- return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_w_w(int a, int b) { +- return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_d_w(long int a, int b) { +- return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrrd_d() { +- return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrwr_d(unsigned long int a) { +- return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { +- return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- + void csrrd_w(int a) { + __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +@@ -108,30 +114,6 @@ void csrxchg_w(unsigned int a, unsigned int b) { + __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} + } + +-unsigned long int iocsrrd_d(unsigned int a) { +- return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void iocsrwr_d(unsigned long int a, unsigned int b) { +- __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void asrtle_d(long int a, long int b) { +- __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void asrtgt_d(long int a, long int b) { +- __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void lddir_d(long int a, int b) { +- __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void ldpte_d(long int a, int b) { +- __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- + void rdtime_d() { + __rdtime_d(); // expected-error {{call to undeclared function '__rdtime_d'}} + } +-- +2.20.1 + diff --git a/0002-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch b/0002-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch new file mode 100644 index 0000000..ada7df9 --- /dev/null +++ b/0002-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch @@ -0,0 +1,351 @@ +From c7f4dc3b39fc0519884ff3553bbdc51627211d7e Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 20 Sep 2023 11:21:56 +0800 +Subject: [PATCH 02/17] [Clang][LoongArch] Support compiler options + -mlsx/-mlasx for clang + +This patch adds compiler options -mlsx/-mlasx which enables the +instruction sets of LSX and LASX, and sets related predefined macros +according to the options. + +(cherry picked from commit 8d4e35600f3ba90997a59fdb9baeb196e723eec9) + +Change-Id: Ie2ac4fb70acaaeeb246e0cc796a5a653db8fd6d6 +--- + .../clang/Basic/DiagnosticDriverKinds.td | 6 +++ + clang/include/clang/Driver/Options.td | 10 +++++ + clang/lib/Basic/Targets/LoongArch.cpp | 12 +++++- + clang/lib/Basic/Targets/LoongArch.h | 4 ++ + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 32 +++++++++++++++ + clang/test/Driver/loongarch-mlasx-error.c | 15 +++++++ + clang/test/Driver/loongarch-mlasx.c | 37 +++++++++++++++++ + clang/test/Driver/loongarch-mlsx-error.c | 12 ++++++ + clang/test/Driver/loongarch-mlsx.c | 41 +++++++++++++++++++ + clang/test/Preprocessor/init-loongarch.c | 35 ++++++++++++++++ + 10 files changed, 203 insertions(+), 1 deletion(-) + create mode 100644 clang/test/Driver/loongarch-mlasx-error.c + create mode 100644 clang/test/Driver/loongarch-mlasx.c + create mode 100644 clang/test/Driver/loongarch-mlsx-error.c + create mode 100644 clang/test/Driver/loongarch-mlsx.c + +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 1b69324d073a..8c751f2c4bda 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -732,6 +732,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< + InGroup; + def err_drv_loongarch_invalid_mfpu_EQ : Error< + "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; ++def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< ++ "wrong fpu width; LSX depends on 64-bit FPU.">; ++def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< ++ "wrong fpu width; LASX depends on 64-bit FPU.">; ++def err_drv_loongarch_invalid_simd_option_combination : Error< ++ "invalid option combination; LASX depends on LSX.">; + + def err_drv_expand_response_file : Error< + "failed to expand response file: %0">; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 37e8c56b2d29..c94979b4dc34 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -192,6 +192,8 @@ def m_x86_Features_Group : OptionGroup<"">, + Group, Flags<[CoreOption]>, DocName<"X86">; + def m_riscv_Features_Group : OptionGroup<"">, + Group, DocName<"RISC-V">; ++def m_loongarch_Features_Group : OptionGroup<"">, ++ Group, DocName<"LoongArch">; + + def m_libc_Group : OptionGroup<"">, Group, + Flags<[HelpHidden]>; +@@ -4190,6 +4192,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> + def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; ++def mlsx : Flag<["-"], "mlsx">, Group, ++ HelpText<"Enable Loongson SIMD Extension (LSX).">; ++def mno_lsx : Flag<["-"], "mno-lsx">, Group, ++ HelpText<"Disable Loongson SIMD Extension (LSX).">; ++def mlasx : Flag<["-"], "mlasx">, Group, ++ HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; ++def mno_lasx : Flag<["-"], "mno-lasx">, Group, ++ HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +index 4448a2ae10a1..88537989a051 100644 +--- a/clang/lib/Basic/Targets/LoongArch.cpp ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -208,6 +208,11 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + TuneCPU = ArchName; + Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + ++ if (HasFeatureLSX) ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ if (HasFeatureLASX) ++ Builder.defineMacro("__loongarch_asx", Twine(1)); ++ + StringRef ABI = getABI(); + if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") + Builder.defineMacro("__loongarch_lp64"); +@@ -257,6 +262,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { + .Case("loongarch64", Is64Bit) + .Case("32bit", !Is64Bit) + .Case("64bit", Is64Bit) ++ .Case("lsx", HasFeatureLSX) ++ .Case("lasx", HasFeatureLASX) + .Default(false); + } + +@@ -274,7 +281,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( + if (Feature == "+d") { + HasFeatureD = true; + } +- } ++ } else if (Feature == "+lsx") ++ HasFeatureLSX = true; ++ else if (Feature == "+lasx") ++ HasFeatureLASX = true; + } + return true; + } +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 34143f462a24..8f4150b2539d 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -27,12 +27,16 @@ protected: + std::string CPU; + bool HasFeatureD; + bool HasFeatureF; ++ bool HasFeatureLSX; ++ bool HasFeatureLASX; + + public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple) { + HasFeatureD = false; + HasFeatureF = false; ++ HasFeatureLSX = false; ++ HasFeatureLASX = false; + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 65925e9ed610..31153a67ad28 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -175,6 +175,38 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + A->ignoreTargetSpecific(); + if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) + A->ignoreTargetSpecific(); ++ ++ // Select lsx feature determined by -m[no-]lsx. ++ if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // LSX depends on 64-bit FPU. ++ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. ++ if (A->getOption().matches(options::OPT_mlsx)) { ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); ++ else /*-mlsx*/ ++ Features.push_back("+lsx"); ++ } else /*-mno-lsx*/ { ++ Features.push_back("-lsx"); ++ } ++ } ++ ++ // Select lasx feature determined by -m[no-]lasx. ++ if (const Arg *A = ++ Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // LASX depends on 64-bit FPU and LSX. ++ // -mno-lsx conflicts with -mlasx. ++ if (A->getOption().matches(options::OPT_mlasx)) { ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); ++ else if (llvm::find(Features, "-lsx") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); ++ else { /*-mlasx*/ ++ Features.push_back("+lsx"); ++ Features.push_back("+lasx"); ++ } ++ } else /*-mno-lasx*/ ++ Features.push_back("-lasx"); ++ } + } + + std::string loongarch::postProcessTargetCPUString(const std::string &CPU, +diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c +new file mode 100644 +index 000000000000..e66f277f7c29 +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlasx-error.c +@@ -0,0 +1,15 @@ ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s ++ ++// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. ++// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. +diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c +new file mode 100644 +index 000000000000..0b934f125c9e +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlasx.c +@@ -0,0 +1,37 @@ ++/// Test -m[no-]lasx options. ++ ++// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++ ++// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" ++// CC1-NOLASX: "-target-feature" "-lasx" ++ ++// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" ++// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" ++ ++int foo(void){ ++ return 3; ++} +diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c +new file mode 100644 +index 000000000000..bd6b8e2718bf +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlsx-error.c +@@ -0,0 +1,12 @@ ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++ ++// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. +diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c +new file mode 100644 +index 000000000000..7d4307b078e1 +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlsx.c +@@ -0,0 +1,41 @@ ++/// Test -m[no-]lsx options. ++ ++// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++ ++// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++ ++// CC1-LSX: "-target-feature" "+lsx" ++// CC1-NOLSX: "-target-feature" "-lsx" ++ ++// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" ++// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" ++ ++int foo(void){ ++ return 3; ++} +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 4ef42a921ec0..e235a7283021 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -807,3 +807,38 @@ + + // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" + // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" ++ ++// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// MLSX-NOT: #define __loongarch_asx ++// MLSX: #define __loongarch_sx 1 ++ ++// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// MLASX: #define __loongarch_asx 1 ++// MLASX: #define __loongarch_sx 1 ++ ++// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// MNO-LSX-NOT: #define __loongarch_asx ++// MNO-LSX-NOT: #define __loongarch_sx +-- +2.20.1 + diff --git a/0003-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch b/0003-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch new file mode 100644 index 0000000..12ef552 --- /dev/null +++ b/0003-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch @@ -0,0 +1,34 @@ +From 9df32c3cc1929b8d09f3e5fc07819f2fdef3ae6d Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 25 Oct 2023 17:35:32 +0800 +Subject: [PATCH 03/17] [Clang][LoongArch] Add ABI implementation of passing + vectors + +(cherry picked from commit eb49b86f5a9b54b0e3c37024334a3c6f6ca88e14) + +Change-Id: I1ba073095a55433fdb9387d2ef433160ac800375 +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 7483bf6d6d1e..26c68c3583b2 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -321,6 +321,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + return ABIArgInfo::getDirect(); + } + ++ // Pass 128-bit/256-bit vector values via vector registers directly. ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +-- +2.20.1 + diff --git a/0004-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch b/0004-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch new file mode 100644 index 0000000..2a50967 --- /dev/null +++ b/0004-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch @@ -0,0 +1,5094 @@ +From 91ff2e72026568ecca978506770f25633a5b536e Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 25 Oct 2023 17:41:03 +0800 +Subject: [PATCH 04/17] [Clang][LoongArch] Support the builtin functions for + LSX + +This patch does the following work: +- Define the builtin functions for LSX +- Add the header file lsxintrin.h +- Add the immediate number range checking for LSX builtins + +(cherry picked from commit d6bfa3341181a80de6c8aede807fc1acc3ce8d9b) + +Change-Id: I15d1da98d6e23fa2d77655b7e2ea431d8e223a98 +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 43 +- + .../clang/Basic/BuiltinsLoongArchBase.def | 53 + + .../clang/Basic/BuiltinsLoongArchLSX.def | 953 +++++ + clang/lib/Headers/CMakeLists.txt | 1 + + clang/lib/Headers/lsxintrin.h | 3726 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 229 +- + 6 files changed, 4965 insertions(+), 40 deletions(-) + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchBase.def + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLSX.def + create mode 100644 clang/lib/Headers/lsxintrin.h + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 20510e18fe58..9ec19c31095a 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -15,46 +15,11 @@ + # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) + #endif + +-// TODO: Support more builtins. +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +-TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") ++// Definition of LoongArch basic builtins. ++#include "clang/Basic/BuiltinsLoongArchBase.def" + +-TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") ++// Definition of LSX builtins. ++#include "clang/Basic/BuiltinsLoongArchLSX.def" + + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +new file mode 100644 +index 000000000000..cbb239223aae +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +@@ -0,0 +1,53 @@ ++//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific basic builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") ++TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") ++TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") ++TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +new file mode 100644 +index 000000000000..8e6aec886c50 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +@@ -0,0 +1,953 @@ ++//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific LSX builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++ ++TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") ++ ++ ++TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index db47de2ad965..1d5573b71e6d 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -78,6 +78,7 @@ set(hlsl_files + + set(loongarch_files + larchintrin.h ++ lsxintrin.h + ) + + set(mips_msa_files +diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h +new file mode 100644 +index 000000000000..a29bc7757ab5 +--- /dev/null ++++ b/clang/lib/Headers/lsxintrin.h +@@ -0,0 +1,3726 @@ ++/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== ++ * ++ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++ * See https://llvm.org/LICENSE.txt for license information. ++ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++ * ++ *===-----------------------------------------------------------------------=== ++ */ ++ ++#ifndef _LOONGSON_SXINTRIN_H ++#define _LOONGSON_SXINTRIN_H 1 ++ ++#if defined(__loongarch_sx) ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) ++ ++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_b(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_h(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_w(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_d(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); ++} ++ ++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vand_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vnor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vxor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) ++ ++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_b(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_h(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_w(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_d(long int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); ++} ++ ++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) ++ ++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfadd_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfadd_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsub_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsub_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmul_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmul_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfdiv_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfdiv_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { ++ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmin_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmin_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmina_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmina_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmax_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmax_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecip_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecip_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrint_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrint_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vflogb_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vflogb_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvth_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvth_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvtl_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvtl_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_w(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_l(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_wu(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_lu(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vandn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) ++ ++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) ++ ++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_l(__m128i _1, __m128i _2) { ++ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftinth_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffinth_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffintl_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrpl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrph_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrml_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrmh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrnel_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrneh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrne_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrne_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrz_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrz_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrp_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrp_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrm_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrm_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); ++} ++ ++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskgez_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsknz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_h_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_w_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_d_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_hu_bu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_wu_hu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_du_wu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); ++} ++ ++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); ++} ++ ++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) ++ ++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vorn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); ++} ++ ++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vldx(void const *_1, long int _2) { ++ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lsx_vstx(__m128i _1, void *_2, long int _3) { ++ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); ++} ++ ++#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) ++ ++#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) ++ ++#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) ++ ++#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) ++ ++#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) ++ ++#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) ++ ++#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) ++ ++#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) ++ ++#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) ++ ++#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); ++} ++ ++#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) ++ ++#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) ++ ++#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) ++ ++#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) ++ ++#endif /* defined(__loongarch_sx) */ ++#endif /* _LOONGSON_SXINTRIN_H */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index a8416bf4de92..d1b015502725 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -3826,6 +3826,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + switch (BuiltinID) { + default: + break; ++ // Basic intrinsics. + case LoongArch::BI__builtin_loongarch_cacop_d: + case LoongArch::BI__builtin_loongarch_cacop_w: { + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); +@@ -3854,8 +3855,234 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); +- } + ++ // LSX intrinsics. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ case LoongArch::BI__builtin_lsx_vld: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); ++ case LoongArch::BI__builtin_lsx_vst: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ case LoongArch::BI__builtin_lsx_vldi: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); ++ case LoongArch::BI__builtin_lsx_vrepli_b: ++ case LoongArch::BI__builtin_lsx_vrepli_h: ++ case LoongArch::BI__builtin_lsx_vrepli_w: ++ case LoongArch::BI__builtin_lsx_vrepli_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); ++ } + return false; + } + +-- +2.20.1 + diff --git a/0005-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch b/0005-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch new file mode 100644 index 0000000..781298e --- /dev/null +++ b/0005-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch @@ -0,0 +1,5143 @@ +From 55045298ccb1676f5534c94d9cfa35f09b83fd4b Mon Sep 17 00:00:00 2001 +From: licongtian +Date: Wed, 25 Oct 2023 17:44:06 +0800 +Subject: [PATCH 05/17] [Clang][LoongArch] Support the builtin functions for + LASX + +This patch does the following work: +- Define the builtin functions for LASX +- Add the header files lasxintrin.h + +(cherry picked from commit a4005e729c8d9dba9ba19f3ce4ad5b60e64dc467) + +Change-Id: I6a7b6b7a0e0de09eb6e07438b089f13d452948f9 +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 3 + + .../clang/Basic/BuiltinsLoongArchLASX.def | 982 +++++ + clang/lib/Headers/CMakeLists.txt | 1 + + clang/lib/Headers/lasxintrin.h | 3860 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 227 + + 5 files changed, 5073 insertions(+) + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLASX.def + create mode 100644 clang/lib/Headers/lasxintrin.h + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 9ec19c31095a..95359a3fdc71 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -21,5 +21,8 @@ + // Definition of LSX builtins. + #include "clang/Basic/BuiltinsLoongArchLSX.def" + ++// Definition of LASX builtins. ++#include "clang/Basic/BuiltinsLoongArchLASX.def" ++ + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +new file mode 100644 +index 000000000000..3de200f665b6 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +@@ -0,0 +1,982 @@ ++//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific LASX builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++ ++TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") ++ ++ ++TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index 1d5573b71e6d..356009ae9157 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -78,6 +78,7 @@ set(hlsl_files + + set(loongarch_files + larchintrin.h ++ lasxintrin.h + lsxintrin.h + ) + +diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h +new file mode 100644 +index 000000000000..6b4d5012a24b +--- /dev/null ++++ b/clang/lib/Headers/lasxintrin.h +@@ -0,0 +1,3860 @@ ++/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== ++ * ++ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++ * See https://llvm.org/LICENSE.txt for license information. ++ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++ * ++ *===-----------------------------------------------------------------------=== ++ */ ++ ++#ifndef _LOONGSON_ASXINTRIN_H ++#define _LOONGSON_ASXINTRIN_H 1 ++ ++#if defined(__loongarch_asx) ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); ++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); ++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) ++ ++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) ++ ++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) ++ ++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) ++ ++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvand_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvnor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvxor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); ++} ++ ++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) ++ ++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_b(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_h(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_w(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_d(long int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfadd_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfadd_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsub_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsub_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmul_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmul_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { ++ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmin_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmin_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmina_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmina_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmax_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmax_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecip_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecip_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrint_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrint_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvflogb_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvflogb_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvth_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvth_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvtl_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvtl_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_w(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_l(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_wu(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_lu(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_b(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_h(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_w(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_d(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); ++} ++ ++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvandn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) ++ ++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) ++ ++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { ++ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftinth_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffinth_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffintl_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrph_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrpl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrmh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrml_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrneh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrnel_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrne_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrne_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrz_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrz_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrp_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrp_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrm_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrm_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++} ++ ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) ++ ++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) ++ ++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvorn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); ++} ++ ++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvldx(void const *_1, long int _2) { ++ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lasx_xvstx(__m256i _1, void *_2, long int _3) { ++ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); ++} ++ ++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) ++ ++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_q(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); ++} ++ ++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvperm_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); ++} ++ ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskgez_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsknz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); ++} ++ ++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); ++} ++ ++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) ++ ++#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) ++ ++#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) ++ ++#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) ++ ++#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) ++ ++#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) ++ ++#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) ++ ++#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) ++ ++#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) ++ ++#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); ++} ++ ++#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ++ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) ++ ++#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ ++ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) ++ ++#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) ++ ++#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) ++ ++#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) ++ ++#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) ++ ++#endif /* defined(__loongarch_asx). */ ++#endif /* _LOONGSON_ASXINTRIN_H. */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index d1b015502725..5ee20554c4cf 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -4082,6 +4082,233 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + case LoongArch::BI__builtin_lsx_vrepli_w: + case LoongArch::BI__builtin_lsx_vrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); ++ ++ // LASX intrinsics. ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w_f: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d_f: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lasx_xvld: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); ++ case LoongArch::BI__builtin_lasx_xvst: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ case LoongArch::BI__builtin_lasx_xvldi: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); ++ case LoongArch::BI__builtin_lasx_xvrepli_b: ++ case LoongArch::BI__builtin_lasx_xvrepli_h: ++ case LoongArch::BI__builtin_lasx_xvrepli_w: ++ case LoongArch::BI__builtin_lasx_xvrepli_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + } + return false; + } +-- +2.20.1 + diff --git a/0006-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch b/0006-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch new file mode 100644 index 0000000..07b4ba9 --- /dev/null +++ b/0006-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch @@ -0,0 +1,12431 @@ +From 26cf77602203853f56675ae6831a1d2837d03ba1 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Fri, 27 Oct 2023 15:57:30 +0800 +Subject: [PATCH 06/17] [LoongArch][CodeGen] Add LSX builtin testcases + +(cherry picked from commit 673c530837faa5ddb45769ddee01d09e1f73d406) + +Change-Id: I394448e6cffcb99fdfe452ee46e076c00c1b576b +--- + .../LoongArch/lsx/builtin-alias-error.c | 1359 +++++ + .../CodeGen/LoongArch/lsx/builtin-alias.c | 4451 ++++++++++++++ + .../CodeGen/LoongArch/lsx/builtin-error.c | 1382 +++++ + clang/test/CodeGen/LoongArch/lsx/builtin.c | 5193 +++++++++++++++++ + 4 files changed, 12385 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin.c + +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +new file mode 100644 +index 000000000000..69cf2254fdd7 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +@@ -0,0 +1,1359 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++#include ++ ++v16i8 vslli_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslli_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslli_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslli_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrai_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrai_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrai_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrai_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrari_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrari_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrari_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrari_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrli_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrli_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrli_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrli_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlri_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlri_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlri_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlri_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitclri_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitclri_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitclri_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitclri_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseti_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitseti_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitseti_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitseti_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitrevi_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitrevi_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitrevi_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitrevi_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vaddi_bu(v16i8 _1, int var) { ++ v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vaddi_hu(v8i16 _1, int var) { ++ v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vaddi_wu(v4i32 _1, int var) { ++ v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vaddi_du(v2i64 _1, int var) { ++ v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsubi_bu(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsubi_hu(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsubi_wu(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsubi_du(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmaxi_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmaxi_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vmaxi_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmaxi_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmaxi_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmaxi_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmaxi_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmaxi_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmini_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmini_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v4i32 vmini_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmini_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmini_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmini_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmini_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmini_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vseqi_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vseqi_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vseqi_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vseqi_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_bu(v16u8 _1, int var) { ++ v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_hu(v8u16 _1, int var) { ++ v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_wu(v4u32 _1, int var) { ++ v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_du(v2u64 _1, int var) { ++ v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_bu(v16u8 _1, int var) { ++ v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_hu(v8u16 _1, int var) { ++ v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_wu(v4u32 _1, int var) { ++ v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_du(v2u64 _1, int var) { ++ v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsat_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsat_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsat_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsat_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vsat_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsat_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsat_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsat_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vreplvei_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vreplvei_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vreplvei_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vreplvei_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vandi_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vnori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vxori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { ++ v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vshuf4i_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vshuf4i_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vshuf4i_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_b(v16i8 _1, int var) { ++ int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_h(v8i16 _1, int var) { ++ int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_w(v4i32 _1, int var) { ++ int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++long vpickve2gr_d(v2i64 _1, int var) { ++ long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_bu(v16i8 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_hu(v8i16 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_wu(v4i32 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int vpickve2gr_du(v2i64 _1, int var) { ++ unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vinsgr2vr_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vinsgr2vr_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vinsgr2vr_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vinsgr2vr_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsllwil_h_b(v16i8 _1, int var) { ++ v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsllwil_w_h(v8i16 _1, int var) { ++ v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsllwil_d_w(v4i32 _1, int var) { ++ v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsllwil_hu_bu(v16u8 _1, int var) { ++ v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsllwil_wu_hu(v8u16 _1, int var) { ++ v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsllwil_du_wu(v4u32 _1, int var) { ++ v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsrl_v(v16i8 _1, int var) { ++ v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsll_v(v16i8 _1, int var) { ++ v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} ++ return res; ++} ++ ++void vstelm_b_idx(v16i8 _1, void *_2, int var) { ++ __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h_idx(v8i16 _1, void *_2, int var) { ++ __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w_idx(v4i32 _1, void *_2, int var) { ++ __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d_idx(v2i64 _1, void *_2, int var) { ++ __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++void vstelm_b(v16i8 _1, void *_2, int var) { ++ __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h(v8i16 _1, void *_2, int var) { ++ __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w(v4i32 _1, void *_2, int var) { ++ __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d(v2i64 _1, void *_2, int var) { ++ __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++v16i8 vldrepl_b(void *_1, int var) { ++ v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vldrepl_h(void *_1, int var) { ++ v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vldrepl_w(void *_1, int var) { ++ v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vldrepl_d(void *_1, int var) { ++ v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrotri_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrotri_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrotri_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrotri_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vld(void *_1, int var) { ++ v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} ++ return res; ++} ++ ++void vst(v16i8 _1, void *_2, int var) { ++ __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} ++} ++ ++v2i64 vldi(int var) { ++ v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +new file mode 100644 +index 000000000000..331e29fb7d17 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +@@ -0,0 +1,4451 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @vsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } ++// CHECK-LABEL: @vsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } ++// CHECK-LABEL: @vsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } ++// CHECK-LABEL: @vsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } ++// CHECK-LABEL: @vslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } ++// CHECK-LABEL: @vslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } ++// CHECK-LABEL: @vslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } ++// CHECK-LABEL: @vslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } ++// CHECK-LABEL: @vsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } ++// CHECK-LABEL: @vsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } ++// CHECK-LABEL: @vsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } ++// CHECK-LABEL: @vsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } ++// CHECK-LABEL: @vsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } ++// CHECK-LABEL: @vsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } ++// CHECK-LABEL: @vsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } ++// CHECK-LABEL: @vsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } ++// CHECK-LABEL: @vsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } ++// CHECK-LABEL: @vsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } ++// CHECK-LABEL: @vsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } ++// CHECK-LABEL: @vsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } ++// CHECK-LABEL: @vsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } ++// CHECK-LABEL: @vsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } ++// CHECK-LABEL: @vsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } ++// CHECK-LABEL: @vsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } ++// CHECK-LABEL: @vsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } ++// CHECK-LABEL: @vsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } ++// CHECK-LABEL: @vsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } ++// CHECK-LABEL: @vsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } ++// CHECK-LABEL: @vsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } ++// CHECK-LABEL: @vsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } ++// CHECK-LABEL: @vsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } ++// CHECK-LABEL: @vsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } ++// CHECK-LABEL: @vsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } ++// CHECK-LABEL: @vsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } ++// CHECK-LABEL: @vsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } ++// CHECK-LABEL: @vsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } ++// CHECK-LABEL: @vsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } ++// CHECK-LABEL: @vsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } ++// CHECK-LABEL: @vsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } ++// CHECK-LABEL: @vsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } ++// CHECK-LABEL: @vbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } ++// CHECK-LABEL: @vbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } ++// CHECK-LABEL: @vbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } ++// CHECK-LABEL: @vbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } ++// CHECK-LABEL: @vbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } ++// CHECK-LABEL: @vbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } ++// CHECK-LABEL: @vbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } ++// CHECK-LABEL: @vbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } ++// CHECK-LABEL: @vbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } ++// CHECK-LABEL: @vbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } ++// CHECK-LABEL: @vbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } ++// CHECK-LABEL: @vbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } ++// CHECK-LABEL: @vbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } ++// CHECK-LABEL: @vbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } ++// CHECK-LABEL: @vbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } ++// CHECK-LABEL: @vbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } ++// CHECK-LABEL: @vbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } ++// CHECK-LABEL: @vbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } ++// CHECK-LABEL: @vbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } ++// CHECK-LABEL: @vbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } ++// CHECK-LABEL: @vbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } ++// CHECK-LABEL: @vbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } ++// CHECK-LABEL: @vbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } ++// CHECK-LABEL: @vbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } ++// CHECK-LABEL: @vadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } ++// CHECK-LABEL: @vadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } ++// CHECK-LABEL: @vadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } ++// CHECK-LABEL: @vadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } ++// CHECK-LABEL: @vaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } ++// CHECK-LABEL: @vaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } ++// CHECK-LABEL: @vaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } ++// CHECK-LABEL: @vaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } ++// CHECK-LABEL: @vsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } ++// CHECK-LABEL: @vsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } ++// CHECK-LABEL: @vsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } ++// CHECK-LABEL: @vsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } ++// CHECK-LABEL: @vsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } ++// CHECK-LABEL: @vsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } ++// CHECK-LABEL: @vsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } ++// CHECK-LABEL: @vsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } ++// CHECK-LABEL: @vmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } ++// CHECK-LABEL: @vmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } ++// CHECK-LABEL: @vmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } ++// CHECK-LABEL: @vmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } ++// CHECK-LABEL: @vmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } ++// CHECK-LABEL: @vmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } ++// CHECK-LABEL: @vmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } ++// CHECK-LABEL: @vmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } ++// CHECK-LABEL: @vmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } ++// CHECK-LABEL: @vmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } ++// CHECK-LABEL: @vmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } ++// CHECK-LABEL: @vmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } ++// CHECK-LABEL: @vmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } ++// CHECK-LABEL: @vmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } ++// CHECK-LABEL: @vmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } ++// CHECK-LABEL: @vmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } ++// CHECK-LABEL: @vmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } ++// CHECK-LABEL: @vmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } ++// CHECK-LABEL: @vmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } ++// CHECK-LABEL: @vmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } ++// CHECK-LABEL: @vmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } ++// CHECK-LABEL: @vmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } ++// CHECK-LABEL: @vmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } ++// CHECK-LABEL: @vmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } ++// CHECK-LABEL: @vmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } ++// CHECK-LABEL: @vmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } ++// CHECK-LABEL: @vmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } ++// CHECK-LABEL: @vmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } ++// CHECK-LABEL: @vmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } ++// CHECK-LABEL: @vmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } ++// CHECK-LABEL: @vmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } ++// CHECK-LABEL: @vmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } ++// CHECK-LABEL: @vseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } ++// CHECK-LABEL: @vseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } ++// CHECK-LABEL: @vseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } ++// CHECK-LABEL: @vseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } ++// CHECK-LABEL: @vseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } ++// CHECK-LABEL: @vseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } ++// CHECK-LABEL: @vseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } ++// CHECK-LABEL: @vseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } ++// CHECK-LABEL: @vslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } ++// CHECK-LABEL: @vslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } ++// CHECK-LABEL: @vslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } ++// CHECK-LABEL: @vslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } ++// CHECK-LABEL: @vslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } ++// CHECK-LABEL: @vslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } ++// CHECK-LABEL: @vslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } ++// CHECK-LABEL: @vslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } ++// CHECK-LABEL: @vslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } ++// CHECK-LABEL: @vslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } ++// CHECK-LABEL: @vslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } ++// CHECK-LABEL: @vslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } ++// CHECK-LABEL: @vslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } ++// CHECK-LABEL: @vslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } ++// CHECK-LABEL: @vslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } ++// CHECK-LABEL: @vslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } ++// CHECK-LABEL: @vsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } ++// CHECK-LABEL: @vsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } ++// CHECK-LABEL: @vsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } ++// CHECK-LABEL: @vsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } ++// CHECK-LABEL: @vslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } ++// CHECK-LABEL: @vslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } ++// CHECK-LABEL: @vslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } ++// CHECK-LABEL: @vslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } ++// CHECK-LABEL: @vsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } ++// CHECK-LABEL: @vsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } ++// CHECK-LABEL: @vsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } ++// CHECK-LABEL: @vsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } ++// CHECK-LABEL: @vslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } ++// CHECK-LABEL: @vslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } ++// CHECK-LABEL: @vslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } ++// CHECK-LABEL: @vslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } ++// CHECK-LABEL: @vsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } ++// CHECK-LABEL: @vsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } ++// CHECK-LABEL: @vsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } ++// CHECK-LABEL: @vsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } ++// CHECK-LABEL: @vsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } ++// CHECK-LABEL: @vsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } ++// CHECK-LABEL: @vsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } ++// CHECK-LABEL: @vsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } ++// CHECK-LABEL: @vadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } ++// CHECK-LABEL: @vadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } ++// CHECK-LABEL: @vadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } ++// CHECK-LABEL: @vadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } ++// CHECK-LABEL: @vsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } ++// CHECK-LABEL: @vsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } ++// CHECK-LABEL: @vsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } ++// CHECK-LABEL: @vsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } ++// CHECK-LABEL: @vsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } ++// CHECK-LABEL: @vsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } ++// CHECK-LABEL: @vsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } ++// CHECK-LABEL: @vsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } ++// CHECK-LABEL: @vavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } ++// CHECK-LABEL: @vavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } ++// CHECK-LABEL: @vavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } ++// CHECK-LABEL: @vavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } ++// CHECK-LABEL: @vavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } ++// CHECK-LABEL: @vavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } ++// CHECK-LABEL: @vavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } ++// CHECK-LABEL: @vavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } ++// CHECK-LABEL: @vavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } ++// CHECK-LABEL: @vavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } ++// CHECK-LABEL: @vavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } ++// CHECK-LABEL: @vavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } ++// CHECK-LABEL: @vavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } ++// CHECK-LABEL: @vavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } ++// CHECK-LABEL: @vavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } ++// CHECK-LABEL: @vavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } ++// CHECK-LABEL: @vssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } ++// CHECK-LABEL: @vssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } ++// CHECK-LABEL: @vssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } ++// CHECK-LABEL: @vssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } ++// CHECK-LABEL: @vssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } ++// CHECK-LABEL: @vssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } ++// CHECK-LABEL: @vssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } ++// CHECK-LABEL: @vssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } ++// CHECK-LABEL: @vabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } ++// CHECK-LABEL: @vabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } ++// CHECK-LABEL: @vabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } ++// CHECK-LABEL: @vabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } ++// CHECK-LABEL: @vabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } ++// CHECK-LABEL: @vabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } ++// CHECK-LABEL: @vabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } ++// CHECK-LABEL: @vabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } ++// CHECK-LABEL: @vmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } ++// CHECK-LABEL: @vmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } ++// CHECK-LABEL: @vmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } ++// CHECK-LABEL: @vmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } ++// CHECK-LABEL: @vmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmadd_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmadd_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmadd_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmsub_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmsub_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmsub_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } ++// CHECK-LABEL: @vdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } ++// CHECK-LABEL: @vdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } ++// CHECK-LABEL: @vdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } ++// CHECK-LABEL: @vdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } ++// CHECK-LABEL: @vdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } ++// CHECK-LABEL: @vdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } ++// CHECK-LABEL: @vdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } ++// CHECK-LABEL: @vhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @vhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @vhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @vhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @vhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @vhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @vhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @vhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @vhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @vhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @vhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @vhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @vmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } ++// CHECK-LABEL: @vmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } ++// CHECK-LABEL: @vmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } ++// CHECK-LABEL: @vmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } ++// CHECK-LABEL: @vmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } ++// CHECK-LABEL: @vmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } ++// CHECK-LABEL: @vmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } ++// CHECK-LABEL: @vmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } ++// CHECK-LABEL: @vreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } ++// CHECK-LABEL: @vreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } ++// CHECK-LABEL: @vreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } ++// CHECK-LABEL: @vreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } ++// CHECK-LABEL: @vreplvei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } ++// CHECK-LABEL: @vreplvei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } ++// CHECK-LABEL: @vreplvei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } ++// CHECK-LABEL: @vreplvei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } ++// CHECK-LABEL: @vpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } ++// CHECK-LABEL: @vpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } ++// CHECK-LABEL: @vpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } ++// CHECK-LABEL: @vpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } ++// CHECK-LABEL: @vpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } ++// CHECK-LABEL: @vpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } ++// CHECK-LABEL: @vpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } ++// CHECK-LABEL: @vpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } ++// CHECK-LABEL: @vilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } ++// CHECK-LABEL: @vilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } ++// CHECK-LABEL: @vilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } ++// CHECK-LABEL: @vilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } ++// CHECK-LABEL: @vilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } ++// CHECK-LABEL: @vilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } ++// CHECK-LABEL: @vilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } ++// CHECK-LABEL: @vilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } ++// CHECK-LABEL: @vpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } ++// CHECK-LABEL: @vpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } ++// CHECK-LABEL: @vpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } ++// CHECK-LABEL: @vpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } ++// CHECK-LABEL: @vpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } ++// CHECK-LABEL: @vpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } ++// CHECK-LABEL: @vpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } ++// CHECK-LABEL: @vpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } ++// CHECK-LABEL: @vshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vshuf_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vshuf_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vshuf_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } ++// CHECK-LABEL: @vandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } ++// CHECK-LABEL: @vor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } ++// CHECK-LABEL: @vori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } ++// CHECK-LABEL: @vnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } ++// CHECK-LABEL: @vnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } ++// CHECK-LABEL: @vxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } ++// CHECK-LABEL: @vxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } ++// CHECK-LABEL: @vbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vbitsel_v(_1, _2, _3); ++} ++// CHECK-LABEL: @vbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @vshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } ++// CHECK-LABEL: @vshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } ++// CHECK-LABEL: @vshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } ++// CHECK-LABEL: @vreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } ++// CHECK-LABEL: @vreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } ++// CHECK-LABEL: @vreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } ++// CHECK-LABEL: @vreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } ++// CHECK-LABEL: @vpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } ++// CHECK-LABEL: @vpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } ++// CHECK-LABEL: @vpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } ++// CHECK-LABEL: @vpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } ++// CHECK-LABEL: @vclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } ++// CHECK-LABEL: @vclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } ++// CHECK-LABEL: @vclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } ++// CHECK-LABEL: @vclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } ++// CHECK-LABEL: @vclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } ++// CHECK-LABEL: @vclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } ++// CHECK-LABEL: @vclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } ++// CHECK-LABEL: @vclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } ++// CHECK-LABEL: @vpickve2gr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @vinsgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @vfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } ++// CHECK-LABEL: @vfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } ++// CHECK-LABEL: @vfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } ++// CHECK-LABEL: @vfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } ++// CHECK-LABEL: @vfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } ++// CHECK-LABEL: @vfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } ++// CHECK-LABEL: @vfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } ++// CHECK-LABEL: @vfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } ++// CHECK-LABEL: @vfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @vfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @vfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } ++// CHECK-LABEL: @vfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } ++// CHECK-LABEL: @vfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } ++// CHECK-LABEL: @vfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } ++// CHECK-LABEL: @vfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } ++// CHECK-LABEL: @vfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } ++// CHECK-LABEL: @vfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } ++// CHECK-LABEL: @vfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } ++// CHECK-LABEL: @vfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } ++// CHECK-LABEL: @vfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } ++// CHECK-LABEL: @vfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } ++// CHECK-LABEL: @vfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } ++// CHECK-LABEL: @vfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } ++// CHECK-LABEL: @vfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } ++// CHECK-LABEL: @vfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } ++// CHECK-LABEL: @vfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } ++// CHECK-LABEL: @vfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } ++// CHECK-LABEL: @vfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } ++// CHECK-LABEL: @vflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } ++// CHECK-LABEL: @vflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } ++// CHECK-LABEL: @vfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } ++// CHECK-LABEL: @vfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } ++// CHECK-LABEL: @vfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } ++// CHECK-LABEL: @vfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } ++// CHECK-LABEL: @vftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } ++// CHECK-LABEL: @vftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } ++// CHECK-LABEL: @vftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } ++// CHECK-LABEL: @vftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } ++// CHECK-LABEL: @vftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } ++// CHECK-LABEL: @vftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } ++// CHECK-LABEL: @vftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } ++// CHECK-LABEL: @vftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } ++// CHECK-LABEL: @vffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } ++// CHECK-LABEL: @vffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } ++// CHECK-LABEL: @vffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } ++// CHECK-LABEL: @vffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } ++// CHECK-LABEL: @vandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } ++// CHECK-LABEL: @vneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } ++// CHECK-LABEL: @vneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } ++// CHECK-LABEL: @vneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } ++// CHECK-LABEL: @vneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } ++// CHECK-LABEL: @vmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } ++// CHECK-LABEL: @vmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } ++// CHECK-LABEL: @vmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } ++// CHECK-LABEL: @vmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } ++// CHECK-LABEL: @vmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } ++// CHECK-LABEL: @vmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } ++// CHECK-LABEL: @vmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } ++// CHECK-LABEL: @vmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } ++// CHECK-LABEL: @vsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @vsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @vsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @vsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @vsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @vsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @vsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } ++// CHECK-LABEL: @vsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } ++// CHECK-LABEL: @vsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } ++// CHECK-LABEL: @vssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } ++// CHECK-LABEL: @vssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } ++// CHECK-LABEL: @vssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } ++// CHECK-LABEL: @vssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } ++// CHECK-LABEL: @vssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } ++// CHECK-LABEL: @vssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @vsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @vsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @vssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @vssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } ++// CHECK-LABEL: @vsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } ++// CHECK-LABEL: @vsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } ++// CHECK-LABEL: @vssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @vsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @vsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @vfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @vfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @vfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vfrstp_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vfrstp_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @vbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } ++// CHECK-LABEL: @vbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } ++// CHECK-LABEL: @vextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @vmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } ++// CHECK-LABEL: @vmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } ++// CHECK-LABEL: @vmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } ++// CHECK-LABEL: @vmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } ++// CHECK-LABEL: @vsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } ++// CHECK-LABEL: @vsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } ++// CHECK-LABEL: @vsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } ++// CHECK-LABEL: @vsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } ++// CHECK-LABEL: @vfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfnmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfnmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfnmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfnmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } ++// CHECK-LABEL: @vftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } ++// CHECK-LABEL: @vftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } ++// CHECK-LABEL: @vftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } ++// CHECK-LABEL: @vftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } ++// CHECK-LABEL: @vftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } ++// CHECK-LABEL: @vftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } ++// CHECK-LABEL: @vffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } ++// CHECK-LABEL: @vftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @vftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } ++// CHECK-LABEL: @vftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } ++// CHECK-LABEL: @vffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } ++// CHECK-LABEL: @vffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } ++// CHECK-LABEL: @vftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } ++// CHECK-LABEL: @vftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } ++// CHECK-LABEL: @vftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } ++// CHECK-LABEL: @vftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } ++// CHECK-LABEL: @vftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } ++// CHECK-LABEL: @vftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } ++// CHECK-LABEL: @vftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } ++// CHECK-LABEL: @vftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } ++// CHECK-LABEL: @vfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } ++// CHECK-LABEL: @vfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } ++// CHECK-LABEL: @vfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } ++// CHECK-LABEL: @vfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } ++// CHECK-LABEL: @vfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } ++// CHECK-LABEL: @vfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } ++// CHECK-LABEL: @vfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } ++// CHECK-LABEL: @vfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } ++// CHECK-LABEL: @vstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @vstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @vstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @vstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @vaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @vaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @vaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @vaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @vaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @vaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @vaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vaddwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vaddwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vaddwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vaddwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vaddwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vaddwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @vsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @vsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @vsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @vsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @vsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @vsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @vaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @vaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @vsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @vsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @vsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @vsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vaddwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vaddwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @vmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @vmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @vmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @vmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @vmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @vmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vmulwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vmulwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vmulwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vmulwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vmulwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vmulwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @vmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @vmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @vmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @vmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vmulwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vmulwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @vhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @vhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @vhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @vmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmaddwev_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmaddwev_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmaddwev_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __lsx_vmaddwev_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __lsx_vmaddwev_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vmaddwev_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmaddwod_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmaddwod_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmaddwod_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __lsx_vmaddwod_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __lsx_vmaddwod_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vmaddwod_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __lsx_vmaddwev_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __lsx_vmaddwev_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __lsx_vmaddwev_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __lsx_vmaddwod_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __lsx_vmaddwod_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __lsx_vmaddwod_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmaddwev_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmaddwod_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __lsx_vmaddwev_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __lsx_vmaddwod_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __lsx_vmaddwev_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __lsx_vmaddwod_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } ++// CHECK-LABEL: @vrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } ++// CHECK-LABEL: @vrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } ++// CHECK-LABEL: @vrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } ++// CHECK-LABEL: @vadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } ++// CHECK-LABEL: @vsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } ++// CHECK-LABEL: @vldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } ++// CHECK-LABEL: @vldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } ++// CHECK-LABEL: @vldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } ++// CHECK-LABEL: @vldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } ++// CHECK-LABEL: @vmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } ++// CHECK-LABEL: @vmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } ++// CHECK-LABEL: @vexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } ++// CHECK-LABEL: @vexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } ++// CHECK-LABEL: @vexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } ++// CHECK-LABEL: @vexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } ++// CHECK-LABEL: @vexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } ++// CHECK-LABEL: @vexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } ++// CHECK-LABEL: @vexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } ++// CHECK-LABEL: @vexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } ++// CHECK-LABEL: @vrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } ++// CHECK-LABEL: @vrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } ++// CHECK-LABEL: @vrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } ++// CHECK-LABEL: @vrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } ++// CHECK-LABEL: @vextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } ++// CHECK-LABEL: @vsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { ++ return __lsx_vssrlrni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { ++ return __lsx_vssrlrni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { ++ return __lsx_vssrlrni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { ++ return __lsx_vssrlrni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { ++ return __lsx_vssrarni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { ++ return __lsx_vssrarni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { ++ return __lsx_vssrarni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { ++ return __lsx_vssrarni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @vld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } ++// CHECK-LABEL: @vst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @vssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @vssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } ++// CHECK-LABEL: @vssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } ++// CHECK-LABEL: @vssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } ++// CHECK-LABEL: @vorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } ++// CHECK-LABEL: @vldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldi() { return __lsx_vldi(1); } ++// CHECK-LABEL: @vshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vshuf_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } ++// CHECK-LABEL: @vstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } ++// CHECK-LABEL: @vextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } ++// CHECK-LABEL: @bnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } ++// CHECK-LABEL: @bnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } ++// CHECK-LABEL: @bnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } ++// CHECK-LABEL: @bnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } ++// CHECK-LABEL: @bnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } ++// CHECK-LABEL: @bz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } ++// CHECK-LABEL: @bz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } ++// CHECK-LABEL: @bz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } ++// CHECK-LABEL: @bz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } ++// CHECK-LABEL: @bz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } ++// CHECK-LABEL: @vfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @vrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrepli_b() { return __lsx_vrepli_b(1); } ++// CHECK-LABEL: @vrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrepli_d() { return __lsx_vrepli_d(1); } ++// CHECK-LABEL: @vrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrepli_h() { return __lsx_vrepli_h(1); } ++// CHECK-LABEL: @vrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +new file mode 100644 +index 000000000000..3fc5f73f1193 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +@@ -0,0 +1,1382 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++v16i8 vslli_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslli_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslli_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslli_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrai_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrai_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrai_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrai_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrari_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrari_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrari_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrari_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrli_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrli_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrli_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrli_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlri_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlri_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlri_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlri_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitclri_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitclri_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitclri_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitclri_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseti_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitseti_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitseti_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitseti_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitrevi_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitrevi_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitrevi_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitrevi_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vaddi_bu(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vaddi_hu(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vaddi_wu(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vaddi_du(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsubi_bu(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsubi_hu(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsubi_wu(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsubi_du(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmaxi_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmaxi_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vmaxi_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmaxi_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmaxi_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmaxi_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmaxi_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmaxi_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmini_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmini_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v4i32 vmini_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmini_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmini_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmini_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmini_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmini_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vseqi_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vseqi_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vseqi_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vseqi_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_bu(v16u8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_hu(v8u16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_wu(v4u32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_du(v2u64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_bu(v16u8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_hu(v8u16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_wu(v4u32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_du(v2u64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsat_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsat_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsat_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsat_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vsat_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsat_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsat_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsat_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vreplvei_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vreplvei_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vreplvei_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vreplvei_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vandi_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vnori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vxori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { ++ v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vshuf4i_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vshuf4i_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vshuf4i_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_b(v16i8 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_h(v8i16 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_w(v4i32 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++long vpickve2gr_d(v2i64 _1, int var) { ++ long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_bu(v16i8 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_hu(v8i16 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_wu(v4i32 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int vpickve2gr_du(v2i64 _1, int var) { ++ unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vinsgr2vr_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vinsgr2vr_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vinsgr2vr_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vinsgr2vr_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsllwil_h_b(v16i8 _1, int var) { ++ v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsllwil_w_h(v8i16 _1, int var) { ++ v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsllwil_d_w(v4i32 _1, int var) { ++ v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsllwil_hu_bu(v16u8 _1, int var) { ++ v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsllwil_wu_hu(v8u16 _1, int var) { ++ v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsllwil_du_wu(v4u32 _1, int var) { ++ v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsrl_v(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsll_v(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} ++ return res; ++} ++ ++void vstelm_b_idx(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h_idx(v8i16 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w_idx(v4i32 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d_idx(v2i64 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++void vstelm_b(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h(v8i16 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w(v4i32 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d(v2i64 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++v16i8 vldrepl_b(void *_1, int var) { ++ v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vldrepl_h(void *_1, int var) { ++ v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vldrepl_w(void *_1, int var) { ++ v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vldrepl_d(void *_1, int var) { ++ v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrotri_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrotri_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrotri_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrotri_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vld(void *_1, int var) { ++ v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} ++ return res; ++} ++ ++void vst(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} ++} ++ ++v2i64 vldi(int var) { ++ v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +new file mode 100644 +index 000000000000..ef5a390e1838 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c +@@ -0,0 +1,5193 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++ ++// CHECK-LABEL: @vsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } ++// CHECK-LABEL: @vsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } ++// CHECK-LABEL: @vsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } ++// CHECK-LABEL: @vsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } ++// CHECK-LABEL: @vslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } ++// CHECK-LABEL: @vslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } ++// CHECK-LABEL: @vslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } ++// CHECK-LABEL: @vslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } ++// CHECK-LABEL: @vsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } ++// CHECK-LABEL: @vsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } ++// CHECK-LABEL: @vsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } ++// CHECK-LABEL: @vsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } ++// CHECK-LABEL: @vsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } ++// CHECK-LABEL: @vsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } ++// CHECK-LABEL: @vsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } ++// CHECK-LABEL: @vsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } ++// CHECK-LABEL: @vsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrar_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrar_b(_1, _2); ++} ++// CHECK-LABEL: @vsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrar_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrar_h(_1, _2); ++} ++// CHECK-LABEL: @vsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrar_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrar_w(_1, _2); ++} ++// CHECK-LABEL: @vsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrar_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrar_d(_1, _2); ++} ++// CHECK-LABEL: @vsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } ++// CHECK-LABEL: @vsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } ++// CHECK-LABEL: @vsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } ++// CHECK-LABEL: @vsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } ++// CHECK-LABEL: @vsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } ++// CHECK-LABEL: @vsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } ++// CHECK-LABEL: @vsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } ++// CHECK-LABEL: @vsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } ++// CHECK-LABEL: @vsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } ++// CHECK-LABEL: @vsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } ++// CHECK-LABEL: @vsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } ++// CHECK-LABEL: @vsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } ++// CHECK-LABEL: @vsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlr_b(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlr_h(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlr_w(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlr_d(_1, _2); ++} ++// CHECK-LABEL: @vsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } ++// CHECK-LABEL: @vsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } ++// CHECK-LABEL: @vsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } ++// CHECK-LABEL: @vsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } ++// CHECK-LABEL: @vbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitclr_b(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitclr_h(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitclr_w(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitclr_d(_1, _2); ++} ++// CHECK-LABEL: @vbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } ++// CHECK-LABEL: @vbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } ++// CHECK-LABEL: @vbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } ++// CHECK-LABEL: @vbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } ++// CHECK-LABEL: @vbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitset_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitset_b(_1, _2); ++} ++// CHECK-LABEL: @vbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitset_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitset_h(_1, _2); ++} ++// CHECK-LABEL: @vbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitset_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitset_w(_1, _2); ++} ++// CHECK-LABEL: @vbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitset_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitset_d(_1, _2); ++} ++// CHECK-LABEL: @vbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } ++// CHECK-LABEL: @vbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } ++// CHECK-LABEL: @vbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } ++// CHECK-LABEL: @vbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } ++// CHECK-LABEL: @vbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitrev_b(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitrev_h(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitrev_w(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitrev_d(_1, _2); ++} ++// CHECK-LABEL: @vbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } ++// CHECK-LABEL: @vbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } ++// CHECK-LABEL: @vbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } ++// CHECK-LABEL: @vbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } ++// CHECK-LABEL: @vadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } ++// CHECK-LABEL: @vadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } ++// CHECK-LABEL: @vadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } ++// CHECK-LABEL: @vadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } ++// CHECK-LABEL: @vaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } ++// CHECK-LABEL: @vaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } ++// CHECK-LABEL: @vaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } ++// CHECK-LABEL: @vaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } ++// CHECK-LABEL: @vsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } ++// CHECK-LABEL: @vsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } ++// CHECK-LABEL: @vsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } ++// CHECK-LABEL: @vsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } ++// CHECK-LABEL: @vsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } ++// CHECK-LABEL: @vsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } ++// CHECK-LABEL: @vsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } ++// CHECK-LABEL: @vsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } ++// CHECK-LABEL: @vmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } ++// CHECK-LABEL: @vmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } ++// CHECK-LABEL: @vmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } ++// CHECK-LABEL: @vmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } ++// CHECK-LABEL: @vmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } ++// CHECK-LABEL: @vmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } ++// CHECK-LABEL: @vmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } ++// CHECK-LABEL: @vmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } ++// CHECK-LABEL: @vmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmax_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmax_bu(_1, _2); ++} ++// CHECK-LABEL: @vmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmax_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmax_hu(_1, _2); ++} ++// CHECK-LABEL: @vmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmax_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmax_wu(_1, _2); ++} ++// CHECK-LABEL: @vmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmax_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmax_du(_1, _2); ++} ++// CHECK-LABEL: @vmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } ++// CHECK-LABEL: @vmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } ++// CHECK-LABEL: @vmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } ++// CHECK-LABEL: @vmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } ++// CHECK-LABEL: @vmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } ++// CHECK-LABEL: @vmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } ++// CHECK-LABEL: @vmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } ++// CHECK-LABEL: @vmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } ++// CHECK-LABEL: @vmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } ++// CHECK-LABEL: @vmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } ++// CHECK-LABEL: @vmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } ++// CHECK-LABEL: @vmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } ++// CHECK-LABEL: @vmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmin_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmin_bu(_1, _2); ++} ++// CHECK-LABEL: @vmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmin_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmin_hu(_1, _2); ++} ++// CHECK-LABEL: @vmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmin_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmin_wu(_1, _2); ++} ++// CHECK-LABEL: @vmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmin_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmin_du(_1, _2); ++} ++// CHECK-LABEL: @vmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } ++// CHECK-LABEL: @vmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } ++// CHECK-LABEL: @vmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } ++// CHECK-LABEL: @vmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } ++// CHECK-LABEL: @vseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } ++// CHECK-LABEL: @vseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } ++// CHECK-LABEL: @vseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } ++// CHECK-LABEL: @vseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } ++// CHECK-LABEL: @vseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } ++// CHECK-LABEL: @vseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } ++// CHECK-LABEL: @vseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } ++// CHECK-LABEL: @vseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } ++// CHECK-LABEL: @vslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } ++// CHECK-LABEL: @vslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } ++// CHECK-LABEL: @vslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } ++// CHECK-LABEL: @vslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } ++// CHECK-LABEL: @vslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } ++// CHECK-LABEL: @vslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } ++// CHECK-LABEL: @vslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } ++// CHECK-LABEL: @vslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } ++// CHECK-LABEL: @vslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslt_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vslt_bu(_1, _2); ++} ++// CHECK-LABEL: @vslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslt_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vslt_hu(_1, _2); ++} ++// CHECK-LABEL: @vslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslt_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vslt_wu(_1, _2); ++} ++// CHECK-LABEL: @vslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslt_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vslt_du(_1, _2); ++} ++// CHECK-LABEL: @vslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } ++// CHECK-LABEL: @vslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } ++// CHECK-LABEL: @vslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } ++// CHECK-LABEL: @vslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } ++// CHECK-LABEL: @vsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } ++// CHECK-LABEL: @vsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } ++// CHECK-LABEL: @vsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } ++// CHECK-LABEL: @vsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } ++// CHECK-LABEL: @vslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } ++// CHECK-LABEL: @vslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } ++// CHECK-LABEL: @vslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } ++// CHECK-LABEL: @vslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } ++// CHECK-LABEL: @vsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsle_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsle_bu(_1, _2); ++} ++// CHECK-LABEL: @vsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsle_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsle_hu(_1, _2); ++} ++// CHECK-LABEL: @vsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsle_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsle_wu(_1, _2); ++} ++// CHECK-LABEL: @vsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsle_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsle_du(_1, _2); ++} ++// CHECK-LABEL: @vslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } ++// CHECK-LABEL: @vslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } ++// CHECK-LABEL: @vslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } ++// CHECK-LABEL: @vslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } ++// CHECK-LABEL: @vsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } ++// CHECK-LABEL: @vsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } ++// CHECK-LABEL: @vsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } ++// CHECK-LABEL: @vsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } ++// CHECK-LABEL: @vsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } ++// CHECK-LABEL: @vsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } ++// CHECK-LABEL: @vsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } ++// CHECK-LABEL: @vsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } ++// CHECK-LABEL: @vadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vadda_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vadda_b(_1, _2); ++} ++// CHECK-LABEL: @vadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vadda_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vadda_h(_1, _2); ++} ++// CHECK-LABEL: @vadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vadda_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vadda_w(_1, _2); ++} ++// CHECK-LABEL: @vadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadda_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vadda_d(_1, _2); ++} ++// CHECK-LABEL: @vsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsadd_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsadd_b(_1, _2); ++} ++// CHECK-LABEL: @vsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsadd_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsadd_h(_1, _2); ++} ++// CHECK-LABEL: @vsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsadd_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsadd_w(_1, _2); ++} ++// CHECK-LABEL: @vsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsadd_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsadd_d(_1, _2); ++} ++// CHECK-LABEL: @vsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsadd_bu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsadd_hu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsadd_wu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsadd_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsadd_du(_1, _2); ++} ++// CHECK-LABEL: @vavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } ++// CHECK-LABEL: @vavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } ++// CHECK-LABEL: @vavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } ++// CHECK-LABEL: @vavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } ++// CHECK-LABEL: @vavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavg_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vavg_bu(_1, _2); ++} ++// CHECK-LABEL: @vavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavg_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vavg_hu(_1, _2); ++} ++// CHECK-LABEL: @vavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavg_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vavg_wu(_1, _2); ++} ++// CHECK-LABEL: @vavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavg_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vavg_du(_1, _2); ++} ++// CHECK-LABEL: @vavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vavgr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vavgr_b(_1, _2); ++} ++// CHECK-LABEL: @vavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vavgr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vavgr_h(_1, _2); ++} ++// CHECK-LABEL: @vavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vavgr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vavgr_w(_1, _2); ++} ++// CHECK-LABEL: @vavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vavgr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vavgr_d(_1, _2); ++} ++// CHECK-LABEL: @vavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vavgr_bu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vavgr_hu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vavgr_wu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vavgr_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vavgr_du(_1, _2); ++} ++// CHECK-LABEL: @vssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssub_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssub_b(_1, _2); ++} ++// CHECK-LABEL: @vssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssub_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssub_h(_1, _2); ++} ++// CHECK-LABEL: @vssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssub_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssub_w(_1, _2); ++} ++// CHECK-LABEL: @vssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssub_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssub_d(_1, _2); ++} ++// CHECK-LABEL: @vssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssub_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vssub_bu(_1, _2); ++} ++// CHECK-LABEL: @vssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssub_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssub_hu(_1, _2); ++} ++// CHECK-LABEL: @vssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssub_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssub_wu(_1, _2); ++} ++// CHECK-LABEL: @vssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssub_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssub_du(_1, _2); ++} ++// CHECK-LABEL: @vabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vabsd_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vabsd_b(_1, _2); ++} ++// CHECK-LABEL: @vabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vabsd_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vabsd_h(_1, _2); ++} ++// CHECK-LABEL: @vabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vabsd_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vabsd_w(_1, _2); ++} ++// CHECK-LABEL: @vabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vabsd_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vabsd_d(_1, _2); ++} ++// CHECK-LABEL: @vabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vabsd_bu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vabsd_hu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vabsd_wu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vabsd_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vabsd_du(_1, _2); ++} ++// CHECK-LABEL: @vmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } ++// CHECK-LABEL: @vmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } ++// CHECK-LABEL: @vmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } ++// CHECK-LABEL: @vmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } ++// CHECK-LABEL: @vmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmadd_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmadd_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmadd_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmsub_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmsub_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmsub_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } ++// CHECK-LABEL: @vdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } ++// CHECK-LABEL: @vdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } ++// CHECK-LABEL: @vdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } ++// CHECK-LABEL: @vdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vdiv_bu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vdiv_hu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vdiv_wu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vdiv_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vdiv_du(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vhaddw_h_b(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vhaddw_w_h(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vhaddw_d_w(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vhaddw_hu_bu(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vhaddw_wu_hu(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vhaddw_du_wu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vhsubw_h_b(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vhsubw_w_h(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vhsubw_d_w(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vhsubw_hu_bu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vhsubw_wu_hu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vhsubw_du_wu(_1, _2); ++} ++// CHECK-LABEL: @vmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } ++// CHECK-LABEL: @vmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } ++// CHECK-LABEL: @vmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } ++// CHECK-LABEL: @vmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } ++// CHECK-LABEL: @vmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmod_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmod_bu(_1, _2); ++} ++// CHECK-LABEL: @vmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmod_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmod_hu(_1, _2); ++} ++// CHECK-LABEL: @vmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmod_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmod_wu(_1, _2); ++} ++// CHECK-LABEL: @vmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmod_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmod_du(_1, _2); ++} ++// CHECK-LABEL: @vreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplve_b(v16i8 _1, int _2) { ++ return __builtin_lsx_vreplve_b(_1, _2); ++} ++// CHECK-LABEL: @vreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplve_h(v8i16 _1, int _2) { ++ return __builtin_lsx_vreplve_h(_1, _2); ++} ++// CHECK-LABEL: @vreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplve_w(v4i32 _1, int _2) { ++ return __builtin_lsx_vreplve_w(_1, _2); ++} ++// CHECK-LABEL: @vreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplve_d(v2i64 _1, int _2) { ++ return __builtin_lsx_vreplve_d(_1, _2); ++} ++// CHECK-LABEL: @vreplvei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } ++// CHECK-LABEL: @vreplvei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } ++// CHECK-LABEL: @vreplvei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } ++// CHECK-LABEL: @vreplvei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } ++// CHECK-LABEL: @vpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickev_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpickev_b(_1, _2); ++} ++// CHECK-LABEL: @vpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickev_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpickev_h(_1, _2); ++} ++// CHECK-LABEL: @vpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickev_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpickev_w(_1, _2); ++} ++// CHECK-LABEL: @vpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickev_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpickev_d(_1, _2); ++} ++// CHECK-LABEL: @vpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpickod_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpickod_b(_1, _2); ++} ++// CHECK-LABEL: @vpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpickod_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpickod_h(_1, _2); ++} ++// CHECK-LABEL: @vpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpickod_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpickod_w(_1, _2); ++} ++// CHECK-LABEL: @vpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpickod_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpickod_d(_1, _2); ++} ++// CHECK-LABEL: @vilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvh_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vilvh_b(_1, _2); ++} ++// CHECK-LABEL: @vilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvh_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vilvh_h(_1, _2); ++} ++// CHECK-LABEL: @vilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvh_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vilvh_w(_1, _2); ++} ++// CHECK-LABEL: @vilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvh_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vilvh_d(_1, _2); ++} ++// CHECK-LABEL: @vilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vilvl_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vilvl_b(_1, _2); ++} ++// CHECK-LABEL: @vilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vilvl_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vilvl_h(_1, _2); ++} ++// CHECK-LABEL: @vilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vilvl_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vilvl_w(_1, _2); ++} ++// CHECK-LABEL: @vilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vilvl_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vilvl_d(_1, _2); ++} ++// CHECK-LABEL: @vpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackev_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpackev_b(_1, _2); ++} ++// CHECK-LABEL: @vpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackev_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpackev_h(_1, _2); ++} ++// CHECK-LABEL: @vpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackev_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpackev_w(_1, _2); ++} ++// CHECK-LABEL: @vpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackev_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpackev_d(_1, _2); ++} ++// CHECK-LABEL: @vpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpackod_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpackod_b(_1, _2); ++} ++// CHECK-LABEL: @vpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpackod_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpackod_h(_1, _2); ++} ++// CHECK-LABEL: @vpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpackod_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpackod_w(_1, _2); ++} ++// CHECK-LABEL: @vpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpackod_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpackod_d(_1, _2); ++} ++// CHECK-LABEL: @vshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vshuf_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vshuf_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vshuf_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } ++// CHECK-LABEL: @vandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } ++// CHECK-LABEL: @vor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } ++// CHECK-LABEL: @vori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } ++// CHECK-LABEL: @vnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } ++// CHECK-LABEL: @vnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } ++// CHECK-LABEL: @vxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } ++// CHECK-LABEL: @vxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } ++// CHECK-LABEL: @vbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vbitsel_v(_1, _2, _3); ++} ++// CHECK-LABEL: @vbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitseli_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } ++// CHECK-LABEL: @vshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } ++// CHECK-LABEL: @vshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } ++// CHECK-LABEL: @vreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } ++// CHECK-LABEL: @vreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } ++// CHECK-LABEL: @vreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } ++// CHECK-LABEL: @vreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } ++// CHECK-LABEL: @vpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } ++// CHECK-LABEL: @vpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } ++// CHECK-LABEL: @vpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } ++// CHECK-LABEL: @vpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } ++// CHECK-LABEL: @vclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } ++// CHECK-LABEL: @vclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } ++// CHECK-LABEL: @vclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } ++// CHECK-LABEL: @vclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } ++// CHECK-LABEL: @vclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } ++// CHECK-LABEL: @vclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } ++// CHECK-LABEL: @vclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } ++// CHECK-LABEL: @vclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } ++// CHECK-LABEL: @vpickve2gr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_bu(v16i8 _1) { ++ return __builtin_lsx_vpickve2gr_bu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_hu(v8i16 _1) { ++ return __builtin_lsx_vpickve2gr_hu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int vpickve2gr_wu(v4i32 _1) { ++ return __builtin_lsx_vpickve2gr_wu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int vpickve2gr_du(v2i64 _1) { ++ return __builtin_lsx_vpickve2gr_du(_1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vinsgr2vr_b(v16i8 _1) { ++ return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vinsgr2vr_h(v8i16 _1) { ++ return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vinsgr2vr_w(v4i32 _1) { ++ return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vinsgr2vr_d(v2i64 _1) { ++ return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); ++} ++// CHECK-LABEL: @vfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfadd_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfadd_s(_1, _2); ++} ++// CHECK-LABEL: @vfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfadd_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfadd_d(_1, _2); ++} ++// CHECK-LABEL: @vfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsub_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfsub_s(_1, _2); ++} ++// CHECK-LABEL: @vfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsub_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfsub_d(_1, _2); ++} ++// CHECK-LABEL: @vfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmul_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmul_s(_1, _2); ++} ++// CHECK-LABEL: @vfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmul_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmul_d(_1, _2); ++} ++// CHECK-LABEL: @vfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfdiv_s(_1, _2); ++} ++// CHECK-LABEL: @vfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfdiv_d(_1, _2); ++} ++// CHECK-LABEL: @vfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcvt_h_s(_1, _2); ++} ++// CHECK-LABEL: @vfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcvt_s_d(_1, _2); ++} ++// CHECK-LABEL: @vfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmin_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmin_s(_1, _2); ++} ++// CHECK-LABEL: @vfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmin_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmin_d(_1, _2); ++} ++// CHECK-LABEL: @vfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmina_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmina_s(_1, _2); ++} ++// CHECK-LABEL: @vfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmina_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmina_d(_1, _2); ++} ++// CHECK-LABEL: @vfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmax_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmax_s(_1, _2); ++} ++// CHECK-LABEL: @vfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmax_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmax_d(_1, _2); ++} ++// CHECK-LABEL: @vfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmaxa_s(_1, _2); ++} ++// CHECK-LABEL: @vfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmaxa_d(_1, _2); ++} ++// CHECK-LABEL: @vfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } ++// CHECK-LABEL: @vfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } ++// CHECK-LABEL: @vfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } ++// CHECK-LABEL: @vfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } ++// CHECK-LABEL: @vfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } ++// CHECK-LABEL: @vfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } ++// CHECK-LABEL: @vfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } ++// CHECK-LABEL: @vfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } ++// CHECK-LABEL: @vfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } ++// CHECK-LABEL: @vfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } ++// CHECK-LABEL: @vflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } ++// CHECK-LABEL: @vflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } ++// CHECK-LABEL: @vfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } ++// CHECK-LABEL: @vfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } ++// CHECK-LABEL: @vfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } ++// CHECK-LABEL: @vfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } ++// CHECK-LABEL: @vftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } ++// CHECK-LABEL: @vftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } ++// CHECK-LABEL: @vftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } ++// CHECK-LABEL: @vftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } ++// CHECK-LABEL: @vftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } ++// CHECK-LABEL: @vftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } ++// CHECK-LABEL: @vftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } ++// CHECK-LABEL: @vftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } ++// CHECK-LABEL: @vffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } ++// CHECK-LABEL: @vffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } ++// CHECK-LABEL: @vffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } ++// CHECK-LABEL: @vffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } ++// CHECK-LABEL: @vandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vandn_v(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vandn_v(_1, _2); ++} ++// CHECK-LABEL: @vneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } ++// CHECK-LABEL: @vneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } ++// CHECK-LABEL: @vneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } ++// CHECK-LABEL: @vneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } ++// CHECK-LABEL: @vmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } ++// CHECK-LABEL: @vmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } ++// CHECK-LABEL: @vmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } ++// CHECK-LABEL: @vmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } ++// CHECK-LABEL: @vmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmuh_bu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmuh_hu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmuh_wu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmuh_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmuh_du(_1, _2); ++} ++// CHECK-LABEL: @vsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @vsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @vsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @vsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vsllwil_hu_bu(v16u8 _1) { ++ return __builtin_lsx_vsllwil_hu_bu(_1, 1); ++} ++// CHECK-LABEL: @vsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vsllwil_wu_hu(v8u16 _1) { ++ return __builtin_lsx_vsllwil_wu_hu(_1, 1); ++} ++// CHECK-LABEL: @vsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vsllwil_du_wu(v4u32 _1) { ++ return __builtin_lsx_vsllwil_du_wu(_1, 1); ++} ++// CHECK-LABEL: @vsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsran_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsran_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsran_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssran_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssran_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssran_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssran_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssran_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssran_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrarn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrarn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrarn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrarn_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrarn_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrarn_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrln_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrln_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrln_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrln_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrln_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrln_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlrn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlrn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlrn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrlrn_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrlrn_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrlrn_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vfrstpi_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vfrstpi_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vfrstp_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vfrstp_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vshuf4i_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } ++// CHECK-LABEL: @vbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } ++// CHECK-LABEL: @vextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vextrins_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vextrins_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vextrins_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vextrins_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vextrins_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vextrins_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextrins_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vextrins_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } ++// CHECK-LABEL: @vmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } ++// CHECK-LABEL: @vmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } ++// CHECK-LABEL: @vmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } ++// CHECK-LABEL: @vsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsigncov_b(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsigncov_h(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsigncov_w(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsigncov_d(_1, _2); ++} ++// CHECK-LABEL: @vfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfnmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfnmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfnmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfnmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } ++// CHECK-LABEL: @vftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } ++// CHECK-LABEL: @vftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } ++// CHECK-LABEL: @vftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } ++// CHECK-LABEL: @vftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } ++// CHECK-LABEL: @vftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } ++// CHECK-LABEL: @vftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftint_w_d(_1, _2); ++} ++// CHECK-LABEL: @vffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// ++v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vffint_s_l(_1, _2); ++} ++// CHECK-LABEL: @vftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrz_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrp_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrm_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrne_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } ++// CHECK-LABEL: @vftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } ++// CHECK-LABEL: @vffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } ++// CHECK-LABEL: @vffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// ++v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } ++// CHECK-LABEL: @vftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } ++// CHECK-LABEL: @vftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } ++// CHECK-LABEL: @vftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } ++// CHECK-LABEL: @vftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } ++// CHECK-LABEL: @vftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } ++// CHECK-LABEL: @vftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } ++// CHECK-LABEL: @vftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrnel_l_s(v4f32 _1) { ++ return __builtin_lsx_vftintrnel_l_s(_1); ++} ++// CHECK-LABEL: @vftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vftintrneh_l_s(v4f32 _1) { ++ return __builtin_lsx_vftintrneh_l_s(_1); ++} ++// CHECK-LABEL: @vfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } ++// CHECK-LABEL: @vfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } ++// CHECK-LABEL: @vfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } ++// CHECK-LABEL: @vfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } ++// CHECK-LABEL: @vfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } ++// CHECK-LABEL: @vfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } ++// CHECK-LABEL: @vfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// ++v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } ++// CHECK-LABEL: @vfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// ++v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } ++// CHECK-LABEL: @vstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_b(v16i8 _1, void *_2) { ++ return __builtin_lsx_vstelm_b(_1, _2, 1, 1); ++} ++// CHECK-LABEL: @vstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_h(v8i16 _1, void *_2) { ++ return __builtin_lsx_vstelm_h(_1, _2, 2, 1); ++} ++// CHECK-LABEL: @vstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_w(v4i32 _1, void *_2) { ++ return __builtin_lsx_vstelm_w(_1, _2, 4, 1); ++} ++// CHECK-LABEL: @vstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_d(v2i64 _1, void *_2) { ++ return __builtin_lsx_vstelm_d(_1, _2, 8, 1); ++} ++// CHECK-LABEL: @vaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vaddwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vaddwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vaddwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vaddwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vaddwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vaddwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsubwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsubwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsubwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsubwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsubwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsubwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsubwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsubwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsubwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsubwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsubwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsubwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vaddwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vaddwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsubwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsubwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsubwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsubwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmulwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmulwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmulwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmulwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmulwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmulwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmulwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmulwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vhaddw_q_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vhaddw_qu_du(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vhsubw_q_d(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vhsubw_qu_du(_1, _2); ++} ++// CHECK-LABEL: @vmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vrotr_b(_1, _2); ++} ++// CHECK-LABEL: @vrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vrotr_h(_1, _2); ++} ++// CHECK-LABEL: @vrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vrotr_w(_1, _2); ++} ++// CHECK-LABEL: @vrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vrotr_d(_1, _2); ++} ++// CHECK-LABEL: @vadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } ++// CHECK-LABEL: @vsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } ++// CHECK-LABEL: @vldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } ++// CHECK-LABEL: @vldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } ++// CHECK-LABEL: @vldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } ++// CHECK-LABEL: @vldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } ++// CHECK-LABEL: @vmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } ++// CHECK-LABEL: @vmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } ++// CHECK-LABEL: @vexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } ++// CHECK-LABEL: @vexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } ++// CHECK-LABEL: @vexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } ++// CHECK-LABEL: @vexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } ++// CHECK-LABEL: @vexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } ++// CHECK-LABEL: @vexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } ++// CHECK-LABEL: @vexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } ++// CHECK-LABEL: @vexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } ++// CHECK-LABEL: @vrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } ++// CHECK-LABEL: @vrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } ++// CHECK-LABEL: @vrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } ++// CHECK-LABEL: @vrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } ++// CHECK-LABEL: @vextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } ++// CHECK-LABEL: @vsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrani_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrani_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrani_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrani_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrarni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrarni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrarni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrarni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrani_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrani_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrani_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrani_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrani_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrani_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrani_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrani_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrarni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vpermi_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpermi_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } ++// CHECK-LABEL: @vst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrln_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrln_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrln_w_d(_1, _2); ++} ++// CHECK-LABEL: @vorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } ++// CHECK-LABEL: @vldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vldi() { return __builtin_lsx_vldi(1); } ++// CHECK-LABEL: @vshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vshuf_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } ++// CHECK-LABEL: @vstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } ++// CHECK-LABEL: @vextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } ++// CHECK-LABEL: @bnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } ++// CHECK-LABEL: @bnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } ++// CHECK-LABEL: @bnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } ++// CHECK-LABEL: @bnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } ++// CHECK-LABEL: @bnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } ++// CHECK-LABEL: @bz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } ++// CHECK-LABEL: @bz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } ++// CHECK-LABEL: @bz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } ++// CHECK-LABEL: @bz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } ++// CHECK-LABEL: @bz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } ++// CHECK-LABEL: @vfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_caf_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_caf_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_ceq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_ceq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cle_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cle_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_clt_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_clt_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cne_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cne_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cor_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cor_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cueq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cueq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cule_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cule_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cult_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cult_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cun_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cune_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cune_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cun_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_saf_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_saf_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_seq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_seq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sle_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sle_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_slt_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_slt_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sne_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sne_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sor_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sor_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sueq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sueq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sule_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sule_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sult_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sult_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sun_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sune_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sune_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sun_s(_1, _2); ++} ++// CHECK-LABEL: @vrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) ++// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// ++v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } ++// CHECK-LABEL: @vrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) ++// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// ++v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } ++// CHECK-LABEL: @vrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) ++// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// ++v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } ++// CHECK-LABEL: @vrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) ++// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// ++v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +-- +2.20.1 + diff --git a/0007-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch b/0007-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch new file mode 100644 index 0000000..6594b59 --- /dev/null +++ b/0007-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch @@ -0,0 +1,11693 @@ +From 6261519a92fc5ab54c5f12f940c33128f981dad9 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Fri, 27 Oct 2023 15:58:55 +0800 +Subject: [PATCH 07/17] [LoongArch][CodeGen] Add LASX builtin testcases + +(cherry picked from commit 535408eedbf812d9038bd40a0faae5001d2256cf) + +Change-Id: I13202fefe0ee5d4ac7e37907c49802c829929dca +--- + .../LoongArch/lasx/builtin-alias-error.c | 1373 +++++ + .../CodeGen/LoongArch/lasx/builtin-alias.c | 4430 ++++++++++++++++ + .../CodeGen/LoongArch/lasx/builtin-error.c | 1392 ++++++ + clang/test/CodeGen/LoongArch/lasx/builtin.c | 4452 +++++++++++++++++ + 4 files changed, 11647 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin.c + +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +new file mode 100644 +index 000000000000..2a3862bbe3c1 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +@@ -0,0 +1,1373 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++#include ++ ++v32i8 xvslli_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslli_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslli_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslli_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrai_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrai_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrai_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrai_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrari_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrari_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrari_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrari_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrli_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrli_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrli_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrli_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlri_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlri_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlri_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlri_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitclri_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitclri_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitclri_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitclri_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseti_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitseti_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitseti_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitseti_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitrevi_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitrevi_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitrevi_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitrevi_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvaddi_bu(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvaddi_hu(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvaddi_wu(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvaddi_du(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsubi_bu(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsubi_hu(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsubi_wu(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsubi_du(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmaxi_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmaxi_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvmaxi_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmaxi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmaxi_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmaxi_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmaxi_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmaxi_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmini_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmini_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v8i32 xvmini_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmini_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmini_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmini_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmini_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmini_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvseqi_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvseqi_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvseqi_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvseqi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_bu(v32u8 _1, int var) { ++ v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_hu(v16u16 _1, int var) { ++ v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_wu(v8u32 _1, int var) { ++ v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_du(v4u64 _1, int var) { ++ v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_bu(v32u8 _1, int var) { ++ v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_hu(v16u16 _1, int var) { ++ v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_wu(v8u32 _1, int var) { ++ v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_du(v4u64 _1, int var) { ++ v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsat_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsat_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsat_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsat_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvsat_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsat_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsat_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsat_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepl128vei_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepl128vei_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepl128vei_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepl128vei_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvandi_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvnori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvxori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { ++ v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvshuf4i_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvshuf4i_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvshuf4i_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpermi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsllwil_h_b(v32i8 _1, int var) { ++ v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsllwil_w_h(v16i16 _1, int var) { ++ v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsllwil_d_w(v8i32 _1, int var) { ++ v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { ++ v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { ++ v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsllwil_du_wu(v8u32 _1, int var) { ++ v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsrl_v(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsll_v(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvld(void *_1, int var) { ++ v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} ++ return res; ++} ++ ++void xvst(v32i8 _1, void *_2, int var) { ++ __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} ++} ++ ++void xvstelm_b(v32i8 _1, void * _2, int var) { ++ __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h(v16i16 _1, void * _2, int var) { ++ __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w(v8i32 _1, void * _2, int var) { ++ __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d(v4i64 _1, void * _2, int var) { ++ __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++void xvstelm_b_idx(v32i8 _1, void * _2, int var) { ++ __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h_idx(v16i16 _1, void * _2, int var) { ++ __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w_idx(v8i32 _1, void * _2, int var) { ++ __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d_idx(v4i64 _1, void * _2, int var) { ++ __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpickve_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpickve_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldi(int var) { ++ v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvinsgr2vr_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsgr2vr_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvldrepl_b(void *_1, int var) { ++ v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvldrepl_h(void *_1, int var) { ++ v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvldrepl_w(void *_1, int var) { ++ v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldrepl_d(void *_1, int var) { ++ v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++int xvpickve2gr_w(v8i32 _1, int var) { ++ int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++unsigned int xvpickve2gr_wu(v8i32 _1, int var) { ++ unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++long xvpickve2gr_d(v4i64 _1, int var) { ++ long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int xvpickve2gr_du(v4i64 _1, int var) { ++ unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrotri_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrotri_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrotri_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrotri_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4f64 xvpickve_d_f(v4f64 _1, int var) { ++ v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} ++ return res; ++} ++ ++v8f32 xvpickve_w_f(v8f32 _1, int var) { ++ v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +new file mode 100644 +index 000000000000..09b2d5fcacf5 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +@@ -0,0 +1,4430 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++// CHECK-LABEL: @xvsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } ++// CHECK-LABEL: @xvsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } ++// CHECK-LABEL: @xvsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } ++// CHECK-LABEL: @xvsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } ++// CHECK-LABEL: @xvslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } ++// CHECK-LABEL: @xvslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } ++// CHECK-LABEL: @xvslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } ++// CHECK-LABEL: @xvslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } ++// CHECK-LABEL: @xvsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } ++// CHECK-LABEL: @xvsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } ++// CHECK-LABEL: @xvsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } ++// CHECK-LABEL: @xvsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } ++// CHECK-LABEL: @xvsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } ++// CHECK-LABEL: @xvsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } ++// CHECK-LABEL: @xvsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } ++// CHECK-LABEL: @xvsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } ++// CHECK-LABEL: @xvsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } ++// CHECK-LABEL: @xvsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } ++// CHECK-LABEL: @xvsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } ++// CHECK-LABEL: @xvsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } ++// CHECK-LABEL: @xvsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } ++// CHECK-LABEL: @xvsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } ++// CHECK-LABEL: @xvsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } ++// CHECK-LABEL: @xvsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } ++// CHECK-LABEL: @xvsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } ++// CHECK-LABEL: @xvsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } ++// CHECK-LABEL: @xvsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } ++// CHECK-LABEL: @xvsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } ++// CHECK-LABEL: @xvsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } ++// CHECK-LABEL: @xvsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } ++// CHECK-LABEL: @xvsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } ++// CHECK-LABEL: @xvsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } ++// CHECK-LABEL: @xvsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } ++// CHECK-LABEL: @xvsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } ++// CHECK-LABEL: @xvsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } ++// CHECK-LABEL: @xvsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } ++// CHECK-LABEL: @xvsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } ++// CHECK-LABEL: @xvsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } ++// CHECK-LABEL: @xvsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } ++// CHECK-LABEL: @xvsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } ++// CHECK-LABEL: @xvbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } ++// CHECK-LABEL: @xvbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } ++// CHECK-LABEL: @xvbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } ++// CHECK-LABEL: @xvbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } ++// CHECK-LABEL: @xvbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } ++// CHECK-LABEL: @xvbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } ++// CHECK-LABEL: @xvbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } ++// CHECK-LABEL: @xvbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } ++// CHECK-LABEL: @xvbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } ++// CHECK-LABEL: @xvbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } ++// CHECK-LABEL: @xvbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } ++// CHECK-LABEL: @xvbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } ++// CHECK-LABEL: @xvbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } ++// CHECK-LABEL: @xvbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } ++// CHECK-LABEL: @xvbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } ++// CHECK-LABEL: @xvbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } ++// CHECK-LABEL: @xvbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } ++// CHECK-LABEL: @xvbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } ++// CHECK-LABEL: @xvbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } ++// CHECK-LABEL: @xvbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } ++// CHECK-LABEL: @xvbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } ++// CHECK-LABEL: @xvadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } ++// CHECK-LABEL: @xvadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } ++// CHECK-LABEL: @xvadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } ++// CHECK-LABEL: @xvadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } ++// CHECK-LABEL: @xvaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } ++// CHECK-LABEL: @xvaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } ++// CHECK-LABEL: @xvaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } ++// CHECK-LABEL: @xvaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } ++// CHECK-LABEL: @xvsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } ++// CHECK-LABEL: @xvsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } ++// CHECK-LABEL: @xvsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } ++// CHECK-LABEL: @xvsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } ++// CHECK-LABEL: @xvsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } ++// CHECK-LABEL: @xvsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } ++// CHECK-LABEL: @xvsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } ++// CHECK-LABEL: @xvsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } ++// CHECK-LABEL: @xvmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } ++// CHECK-LABEL: @xvmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } ++// CHECK-LABEL: @xvmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } ++// CHECK-LABEL: @xvmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } ++// CHECK-LABEL: @xvmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } ++// CHECK-LABEL: @xvmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } ++// CHECK-LABEL: @xvmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } ++// CHECK-LABEL: @xvmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } ++// CHECK-LABEL: @xvmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } ++// CHECK-LABEL: @xvmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } ++// CHECK-LABEL: @xvmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } ++// CHECK-LABEL: @xvmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } ++// CHECK-LABEL: @xvmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } ++// CHECK-LABEL: @xvmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } ++// CHECK-LABEL: @xvmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } ++// CHECK-LABEL: @xvmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } ++// CHECK-LABEL: @xvmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } ++// CHECK-LABEL: @xvmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } ++// CHECK-LABEL: @xvmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } ++// CHECK-LABEL: @xvmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } ++// CHECK-LABEL: @xvmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } ++// CHECK-LABEL: @xvmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } ++// CHECK-LABEL: @xvmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } ++// CHECK-LABEL: @xvmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } ++// CHECK-LABEL: @xvmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } ++// CHECK-LABEL: @xvmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } ++// CHECK-LABEL: @xvmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } ++// CHECK-LABEL: @xvmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } ++// CHECK-LABEL: @xvmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } ++// CHECK-LABEL: @xvseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } ++// CHECK-LABEL: @xvseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } ++// CHECK-LABEL: @xvseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } ++// CHECK-LABEL: @xvseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } ++// CHECK-LABEL: @xvseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } ++// CHECK-LABEL: @xvseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } ++// CHECK-LABEL: @xvseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } ++// CHECK-LABEL: @xvseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } ++// CHECK-LABEL: @xvslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } ++// CHECK-LABEL: @xvslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } ++// CHECK-LABEL: @xvslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } ++// CHECK-LABEL: @xvslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } ++// CHECK-LABEL: @xvslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } ++// CHECK-LABEL: @xvslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } ++// CHECK-LABEL: @xvslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } ++// CHECK-LABEL: @xvslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } ++// CHECK-LABEL: @xvslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } ++// CHECK-LABEL: @xvslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } ++// CHECK-LABEL: @xvslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } ++// CHECK-LABEL: @xvslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } ++// CHECK-LABEL: @xvslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } ++// CHECK-LABEL: @xvslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } ++// CHECK-LABEL: @xvslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } ++// CHECK-LABEL: @xvslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } ++// CHECK-LABEL: @xvsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } ++// CHECK-LABEL: @xvsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } ++// CHECK-LABEL: @xvsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } ++// CHECK-LABEL: @xvsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } ++// CHECK-LABEL: @xvslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } ++// CHECK-LABEL: @xvslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } ++// CHECK-LABEL: @xvslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } ++// CHECK-LABEL: @xvslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } ++// CHECK-LABEL: @xvsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } ++// CHECK-LABEL: @xvsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } ++// CHECK-LABEL: @xvsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } ++// CHECK-LABEL: @xvsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } ++// CHECK-LABEL: @xvslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } ++// CHECK-LABEL: @xvslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } ++// CHECK-LABEL: @xvslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } ++// CHECK-LABEL: @xvslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } ++// CHECK-LABEL: @xvsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } ++// CHECK-LABEL: @xvsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } ++// CHECK-LABEL: @xvsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } ++// CHECK-LABEL: @xvsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } ++// CHECK-LABEL: @xvsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } ++// CHECK-LABEL: @xvsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } ++// CHECK-LABEL: @xvsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } ++// CHECK-LABEL: @xvsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } ++// CHECK-LABEL: @xvadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } ++// CHECK-LABEL: @xvadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } ++// CHECK-LABEL: @xvadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } ++// CHECK-LABEL: @xvadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } ++// CHECK-LABEL: @xvsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } ++// CHECK-LABEL: @xvsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } ++// CHECK-LABEL: @xvsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } ++// CHECK-LABEL: @xvsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } ++// CHECK-LABEL: @xvsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } ++// CHECK-LABEL: @xvsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } ++// CHECK-LABEL: @xvavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } ++// CHECK-LABEL: @xvavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } ++// CHECK-LABEL: @xvavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } ++// CHECK-LABEL: @xvavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } ++// CHECK-LABEL: @xvavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } ++// CHECK-LABEL: @xvavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } ++// CHECK-LABEL: @xvavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } ++// CHECK-LABEL: @xvavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } ++// CHECK-LABEL: @xvavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } ++// CHECK-LABEL: @xvavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } ++// CHECK-LABEL: @xvavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } ++// CHECK-LABEL: @xvavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } ++// CHECK-LABEL: @xvavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } ++// CHECK-LABEL: @xvavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } ++// CHECK-LABEL: @xvavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } ++// CHECK-LABEL: @xvavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } ++// CHECK-LABEL: @xvssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } ++// CHECK-LABEL: @xvssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } ++// CHECK-LABEL: @xvssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } ++// CHECK-LABEL: @xvssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } ++// CHECK-LABEL: @xvssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } ++// CHECK-LABEL: @xvssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } ++// CHECK-LABEL: @xvssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } ++// CHECK-LABEL: @xvssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } ++// CHECK-LABEL: @xvabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } ++// CHECK-LABEL: @xvabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } ++// CHECK-LABEL: @xvabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } ++// CHECK-LABEL: @xvabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } ++// CHECK-LABEL: @xvabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } ++// CHECK-LABEL: @xvabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } ++// CHECK-LABEL: @xvabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } ++// CHECK-LABEL: @xvabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } ++// CHECK-LABEL: @xvmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } ++// CHECK-LABEL: @xvmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } ++// CHECK-LABEL: @xvmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } ++// CHECK-LABEL: @xvmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } ++// CHECK-LABEL: @xvmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } ++// CHECK-LABEL: @xvdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } ++// CHECK-LABEL: @xvdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } ++// CHECK-LABEL: @xvdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } ++// CHECK-LABEL: @xvdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } ++// CHECK-LABEL: @xvdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } ++// CHECK-LABEL: @xvdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } ++// CHECK-LABEL: @xvdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } ++// CHECK-LABEL: @xvhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } ++// CHECK-LABEL: @xvmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } ++// CHECK-LABEL: @xvmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } ++// CHECK-LABEL: @xvmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } ++// CHECK-LABEL: @xvmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } ++// CHECK-LABEL: @xvmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } ++// CHECK-LABEL: @xvmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } ++// CHECK-LABEL: @xvrepl128vei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } ++// CHECK-LABEL: @xvpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } ++// CHECK-LABEL: @xvpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } ++// CHECK-LABEL: @xvpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } ++// CHECK-LABEL: @xvpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } ++// CHECK-LABEL: @xvpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } ++// CHECK-LABEL: @xvpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } ++// CHECK-LABEL: @xvpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } ++// CHECK-LABEL: @xvpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } ++// CHECK-LABEL: @xvilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } ++// CHECK-LABEL: @xvilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } ++// CHECK-LABEL: @xvilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } ++// CHECK-LABEL: @xvilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } ++// CHECK-LABEL: @xvilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } ++// CHECK-LABEL: @xvilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } ++// CHECK-LABEL: @xvilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } ++// CHECK-LABEL: @xvilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } ++// CHECK-LABEL: @xvpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } ++// CHECK-LABEL: @xvpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } ++// CHECK-LABEL: @xvpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } ++// CHECK-LABEL: @xvpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } ++// CHECK-LABEL: @xvpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } ++// CHECK-LABEL: @xvpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } ++// CHECK-LABEL: @xvpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } ++// CHECK-LABEL: @xvpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } ++// CHECK-LABEL: @xvshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } ++// CHECK-LABEL: @xvand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } ++// CHECK-LABEL: @xvandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } ++// CHECK-LABEL: @xvor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } ++// CHECK-LABEL: @xvori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } ++// CHECK-LABEL: @xvnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } ++// CHECK-LABEL: @xvnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } ++// CHECK-LABEL: @xvxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } ++// CHECK-LABEL: @xvxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } ++// CHECK-LABEL: @xvbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } ++// CHECK-LABEL: @xvbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @xvshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } ++// CHECK-LABEL: @xvreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } ++// CHECK-LABEL: @xvreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } ++// CHECK-LABEL: @xvreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } ++// CHECK-LABEL: @xvreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } ++// CHECK-LABEL: @xvpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } ++// CHECK-LABEL: @xvpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } ++// CHECK-LABEL: @xvpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } ++// CHECK-LABEL: @xvpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } ++// CHECK-LABEL: @xvclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } ++// CHECK-LABEL: @xvclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } ++// CHECK-LABEL: @xvclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } ++// CHECK-LABEL: @xvclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } ++// CHECK-LABEL: @xvclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } ++// CHECK-LABEL: @xvclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } ++// CHECK-LABEL: @xvclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } ++// CHECK-LABEL: @xvclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } ++// CHECK-LABEL: @xvfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } ++// CHECK-LABEL: @xvfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } ++// CHECK-LABEL: @xvfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } ++// CHECK-LABEL: @xvfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } ++// CHECK-LABEL: @xvfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } ++// CHECK-LABEL: @xvfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } ++// CHECK-LABEL: @xvfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } ++// CHECK-LABEL: @xvfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } ++// CHECK-LABEL: @xvfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @xvfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @xvfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } ++// CHECK-LABEL: @xvfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } ++// CHECK-LABEL: @xvfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } ++// CHECK-LABEL: @xvfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } ++// CHECK-LABEL: @xvfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } ++// CHECK-LABEL: @xvfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } ++// CHECK-LABEL: @xvfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } ++// CHECK-LABEL: @xvfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } ++// CHECK-LABEL: @xvfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } ++// CHECK-LABEL: @xvfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } ++// CHECK-LABEL: @xvfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } ++// CHECK-LABEL: @xvfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } ++// CHECK-LABEL: @xvfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } ++// CHECK-LABEL: @xvfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } ++// CHECK-LABEL: @xvfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } ++// CHECK-LABEL: @xvfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } ++// CHECK-LABEL: @xvflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } ++// CHECK-LABEL: @xvflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } ++// CHECK-LABEL: @xvfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } ++// CHECK-LABEL: @xvfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } ++// CHECK-LABEL: @xvfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } ++// CHECK-LABEL: @xvfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } ++// CHECK-LABEL: @xvftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } ++// CHECK-LABEL: @xvftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } ++// CHECK-LABEL: @xvftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } ++// CHECK-LABEL: @xvftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } ++// CHECK-LABEL: @xvftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } ++// CHECK-LABEL: @xvftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } ++// CHECK-LABEL: @xvftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } ++// CHECK-LABEL: @xvftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } ++// CHECK-LABEL: @xvffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } ++// CHECK-LABEL: @xvffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } ++// CHECK-LABEL: @xvffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } ++// CHECK-LABEL: @xvffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } ++// CHECK-LABEL: @xvreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } ++// CHECK-LABEL: @xvreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } ++// CHECK-LABEL: @xvreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } ++// CHECK-LABEL: @xvreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } ++// CHECK-LABEL: @xvpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @xvandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } ++// CHECK-LABEL: @xvneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } ++// CHECK-LABEL: @xvneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } ++// CHECK-LABEL: @xvneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } ++// CHECK-LABEL: @xvneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } ++// CHECK-LABEL: @xvmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } ++// CHECK-LABEL: @xvmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } ++// CHECK-LABEL: @xvmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } ++// CHECK-LABEL: @xvmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } ++// CHECK-LABEL: @xvmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } ++// CHECK-LABEL: @xvmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } ++// CHECK-LABEL: @xvmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } ++// CHECK-LABEL: @xvmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } ++// CHECK-LABEL: @xvsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @xvsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @xvsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @xvsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @xvsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } ++// CHECK-LABEL: @xvsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } ++// CHECK-LABEL: @xvsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } ++// CHECK-LABEL: @xvssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } ++// CHECK-LABEL: @xvssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } ++// CHECK-LABEL: @xvfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @xvbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } ++// CHECK-LABEL: @xvbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } ++// CHECK-LABEL: @xvextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @xvmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } ++// CHECK-LABEL: @xvmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } ++// CHECK-LABEL: @xvmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } ++// CHECK-LABEL: @xvmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } ++// CHECK-LABEL: @xvsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } ++// CHECK-LABEL: @xvsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } ++// CHECK-LABEL: @xvsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } ++// CHECK-LABEL: @xvsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } ++// CHECK-LABEL: @xvfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } ++// CHECK-LABEL: @xvftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } ++// CHECK-LABEL: @xvftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } ++// CHECK-LABEL: @xvftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } ++// CHECK-LABEL: @xvftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } ++// CHECK-LABEL: @xvftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } ++// CHECK-LABEL: @xvftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } ++// CHECK-LABEL: @xvffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } ++// CHECK-LABEL: @xvftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @xvftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } ++// CHECK-LABEL: @xvftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } ++// CHECK-LABEL: @xvffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } ++// CHECK-LABEL: @xvffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } ++// CHECK-LABEL: @xvftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } ++// CHECK-LABEL: @xvftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } ++// CHECK-LABEL: @xvftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } ++// CHECK-LABEL: @xvftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } ++// CHECK-LABEL: @xvftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } ++// CHECK-LABEL: @xvftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } ++// CHECK-LABEL: @xvftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } ++// CHECK-LABEL: @xvftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } ++// CHECK-LABEL: @xvfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } ++// CHECK-LABEL: @xvfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } ++// CHECK-LABEL: @xvfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } ++// CHECK-LABEL: @xvfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } ++// CHECK-LABEL: @xvfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } ++// CHECK-LABEL: @xvfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } ++// CHECK-LABEL: @xvfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } ++// CHECK-LABEL: @xvfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } ++// CHECK-LABEL: @xvld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } ++// CHECK-LABEL: @xvst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } ++// CHECK-LABEL: @xvstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @xvstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @xvstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @xvstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @xvinsve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } ++// CHECK-LABEL: @xvinsve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } ++// CHECK-LABEL: @xvpickve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } ++// CHECK-LABEL: @xvpickve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } ++// CHECK-LABEL: @xvssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } ++// CHECK-LABEL: @xvldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldi() { return __lasx_xvldi(1); } ++// CHECK-LABEL: @xvldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } ++// CHECK-LABEL: @xvstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } ++// CHECK-LABEL: @xvextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } ++// CHECK-LABEL: @xvinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @xvinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @xvreplve0_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } ++// CHECK-LABEL: @xvreplve0_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } ++// CHECK-LABEL: @xvreplve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } ++// CHECK-LABEL: @xvreplve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } ++// CHECK-LABEL: @xvreplve0_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } ++// CHECK-LABEL: @vext2xv_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } ++// CHECK-LABEL: @vext2xv_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } ++// CHECK-LABEL: @vext2xv_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } ++// CHECK-LABEL: @vext2xv_w_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } ++// CHECK-LABEL: @vext2xv_d_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } ++// CHECK-LABEL: @vext2xv_d_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } ++// CHECK-LABEL: @vext2xv_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } ++// CHECK-LABEL: @vext2xv_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } ++// CHECK-LABEL: @vext2xv_wu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } ++// CHECK-LABEL: @vext2xv_du_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } ++// CHECK-LABEL: @xvpermi_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } ++// CHECK-LABEL: @xvpermi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } ++// CHECK-LABEL: @xvperm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } ++// CHECK-LABEL: @xvldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } ++// CHECK-LABEL: @xvldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } ++// CHECK-LABEL: @xvldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } ++// CHECK-LABEL: @xvldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } ++// CHECK-LABEL: @xvpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @xvaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } ++// CHECK-LABEL: @xvrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } ++// CHECK-LABEL: @xvrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } ++// CHECK-LABEL: @xvrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } ++// CHECK-LABEL: @xvadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } ++// CHECK-LABEL: @xvsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } ++// CHECK-LABEL: @xvmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } ++// CHECK-LABEL: @xvexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } ++// CHECK-LABEL: @xvexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } ++// CHECK-LABEL: @xvexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } ++// CHECK-LABEL: @xvexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } ++// CHECK-LABEL: @xvexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } ++// CHECK-LABEL: @xvexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } ++// CHECK-LABEL: @xvexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } ++// CHECK-LABEL: @xvexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } ++// CHECK-LABEL: @xvrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } ++// CHECK-LABEL: @xvrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } ++// CHECK-LABEL: @xvrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } ++// CHECK-LABEL: @xvrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } ++// CHECK-LABEL: @xvextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } ++// CHECK-LABEL: @xvsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xbnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } ++// CHECK-LABEL: @xbnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } ++// CHECK-LABEL: @xbnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } ++// CHECK-LABEL: @xbnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } ++// CHECK-LABEL: @xbnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } ++// CHECK-LABEL: @xbz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } ++// CHECK-LABEL: @xbz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } ++// CHECK-LABEL: @xbz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } ++// CHECK-LABEL: @xbz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } ++// CHECK-LABEL: @xbz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } ++// CHECK-LABEL: @xvfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @xvpickve_d_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } ++// CHECK-LABEL: @xvpickve_w_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } ++// CHECK-LABEL: @xvrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } ++// CHECK-LABEL: @xvrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } ++// CHECK-LABEL: @xvrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } ++// CHECK-LABEL: @xvrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +new file mode 100644 +index 000000000000..724484465769 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +@@ -0,0 +1,1392 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++v32i8 xvslli_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslli_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslli_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslli_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrai_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrai_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrai_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrai_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrari_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrari_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrari_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrari_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrli_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrli_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrli_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrli_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlri_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlri_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlri_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlri_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitclri_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitclri_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitclri_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitclri_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseti_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitseti_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitseti_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitseti_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitrevi_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitrevi_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitrevi_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitrevi_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvaddi_bu(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvaddi_hu(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvaddi_wu(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvaddi_du(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsubi_bu(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsubi_hu(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsubi_wu(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsubi_du(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmaxi_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmaxi_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvmaxi_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmaxi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmaxi_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmaxi_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmaxi_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmaxi_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmini_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmini_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v8i32 xvmini_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmini_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmini_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmini_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmini_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmini_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvseqi_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvseqi_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvseqi_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvseqi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_bu(v32u8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_hu(v16u16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_wu(v8u32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_du(v4u64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} ++ res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_bu(v32u8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_hu(v16u16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_wu(v8u32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_du(v4u64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsat_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsat_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsat_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsat_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvsat_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsat_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsat_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsat_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepl128vei_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepl128vei_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepl128vei_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepl128vei_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} ++ res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvandi_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvnori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvxori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvshuf4i_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvshuf4i_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvshuf4i_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpermi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsllwil_h_b(v32i8 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsllwil_w_h(v16i16 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsllwil_d_w(v8i32 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { ++ v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { ++ v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsllwil_du_wu(v8u32 _1, int var) { ++ v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsrl_v(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsll_v(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvld(void *_1, int var) { ++ v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} ++ return res; ++} ++ ++void xvst(v32i8 _1, void *_2, int var) { ++ __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} ++} ++ ++void xvstelm_b(v32i8 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} ++ __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} ++ __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h(v16i16 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} ++ __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} ++ __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w(v8i32 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} ++ __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} ++ __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d(v4i64 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} ++ __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} ++ __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++void xvstelm_b_idx(v32i8 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h_idx(v16i16 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w_idx(v8i32 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d_idx(v4i64 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpickve_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpickve_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldi(int var) { ++ v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} ++ res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvinsgr2vr_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsgr2vr_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvldrepl_b(void *_1, int var) { ++ v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} ++ res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvldrepl_h(void *_1, int var) { ++ v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} ++ res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvldrepl_w(void *_1, int var) { ++ v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} ++ res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldrepl_d(void *_1, int var) { ++ v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} ++ res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++int xvpickve2gr_w(v8i32 _1, int var) { ++ int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++unsigned int xvpickve2gr_wu(v8i32 _1, int var) { ++ unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++long xvpickve2gr_d(v4i64 _1, int var) { ++ long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int xvpickve2gr_du(v4i64 _1, int var) { ++ unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrotri_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrotri_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrotri_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrotri_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4f64 xvpickve_d_f(v4f64 _1, int var) { ++ v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} ++ res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} ++ return res; ++} ++ ++v8f32 xvpickve_w_f(v8f32 _1, int var) { ++ v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} ++ res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +new file mode 100644 +index 000000000000..0185f2004d52 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c +@@ -0,0 +1,4452 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++// CHECK-LABEL: @xvsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } ++// CHECK-LABEL: @xvsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } ++// CHECK-LABEL: @xvsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } ++// CHECK-LABEL: @xvsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } ++// CHECK-LABEL: @xvslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } ++// CHECK-LABEL: @xvslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } ++// CHECK-LABEL: @xvslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } ++// CHECK-LABEL: @xvslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } ++// CHECK-LABEL: @xvsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } ++// CHECK-LABEL: @xvsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } ++// CHECK-LABEL: @xvsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } ++// CHECK-LABEL: @xvsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } ++// CHECK-LABEL: @xvsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } ++// CHECK-LABEL: @xvsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } ++// CHECK-LABEL: @xvsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } ++// CHECK-LABEL: @xvsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } ++// CHECK-LABEL: @xvsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } ++// CHECK-LABEL: @xvsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } ++// CHECK-LABEL: @xvsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } ++// CHECK-LABEL: @xvsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } ++// CHECK-LABEL: @xvsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } ++// CHECK-LABEL: @xvsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } ++// CHECK-LABEL: @xvsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } ++// CHECK-LABEL: @xvsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } ++// CHECK-LABEL: @xvsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } ++// CHECK-LABEL: @xvsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } ++// CHECK-LABEL: @xvsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } ++// CHECK-LABEL: @xvsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } ++// CHECK-LABEL: @xvsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } ++// CHECK-LABEL: @xvsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } ++// CHECK-LABEL: @xvsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } ++// CHECK-LABEL: @xvsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } ++// CHECK-LABEL: @xvsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } ++// CHECK-LABEL: @xvsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } ++// CHECK-LABEL: @xvsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } ++// CHECK-LABEL: @xvsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } ++// CHECK-LABEL: @xvsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } ++// CHECK-LABEL: @xvsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } ++// CHECK-LABEL: @xvsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } ++// CHECK-LABEL: @xvsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } ++// CHECK-LABEL: @xvbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } ++// CHECK-LABEL: @xvbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } ++// CHECK-LABEL: @xvbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } ++// CHECK-LABEL: @xvbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } ++// CHECK-LABEL: @xvbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } ++// CHECK-LABEL: @xvbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } ++// CHECK-LABEL: @xvbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } ++// CHECK-LABEL: @xvbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } ++// CHECK-LABEL: @xvbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } ++// CHECK-LABEL: @xvbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } ++// CHECK-LABEL: @xvbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } ++// CHECK-LABEL: @xvbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } ++// CHECK-LABEL: @xvbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } ++// CHECK-LABEL: @xvbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } ++// CHECK-LABEL: @xvbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } ++// CHECK-LABEL: @xvbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } ++// CHECK-LABEL: @xvbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } ++// CHECK-LABEL: @xvbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } ++// CHECK-LABEL: @xvbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } ++// CHECK-LABEL: @xvbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } ++// CHECK-LABEL: @xvbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } ++// CHECK-LABEL: @xvadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } ++// CHECK-LABEL: @xvadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } ++// CHECK-LABEL: @xvadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } ++// CHECK-LABEL: @xvadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } ++// CHECK-LABEL: @xvaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } ++// CHECK-LABEL: @xvaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } ++// CHECK-LABEL: @xvaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } ++// CHECK-LABEL: @xvaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } ++// CHECK-LABEL: @xvsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } ++// CHECK-LABEL: @xvsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } ++// CHECK-LABEL: @xvsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } ++// CHECK-LABEL: @xvsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } ++// CHECK-LABEL: @xvsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } ++// CHECK-LABEL: @xvsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } ++// CHECK-LABEL: @xvsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } ++// CHECK-LABEL: @xvsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } ++// CHECK-LABEL: @xvmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } ++// CHECK-LABEL: @xvmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } ++// CHECK-LABEL: @xvmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } ++// CHECK-LABEL: @xvmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } ++// CHECK-LABEL: @xvmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } ++// CHECK-LABEL: @xvmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } ++// CHECK-LABEL: @xvmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } ++// CHECK-LABEL: @xvmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } ++// CHECK-LABEL: @xvmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } ++// CHECK-LABEL: @xvmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } ++// CHECK-LABEL: @xvmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } ++// CHECK-LABEL: @xvmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } ++// CHECK-LABEL: @xvmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } ++// CHECK-LABEL: @xvmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } ++// CHECK-LABEL: @xvmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } ++// CHECK-LABEL: @xvmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } ++// CHECK-LABEL: @xvmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } ++// CHECK-LABEL: @xvmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } ++// CHECK-LABEL: @xvmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } ++// CHECK-LABEL: @xvmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } ++// CHECK-LABEL: @xvmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } ++// CHECK-LABEL: @xvmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } ++// CHECK-LABEL: @xvmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } ++// CHECK-LABEL: @xvmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } ++// CHECK-LABEL: @xvmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } ++// CHECK-LABEL: @xvmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } ++// CHECK-LABEL: @xvmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } ++// CHECK-LABEL: @xvmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } ++// CHECK-LABEL: @xvmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } ++// CHECK-LABEL: @xvseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } ++// CHECK-LABEL: @xvseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } ++// CHECK-LABEL: @xvseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } ++// CHECK-LABEL: @xvseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } ++// CHECK-LABEL: @xvseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } ++// CHECK-LABEL: @xvseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } ++// CHECK-LABEL: @xvseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } ++// CHECK-LABEL: @xvseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } ++// CHECK-LABEL: @xvslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } ++// CHECK-LABEL: @xvslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } ++// CHECK-LABEL: @xvslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } ++// CHECK-LABEL: @xvslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } ++// CHECK-LABEL: @xvslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } ++// CHECK-LABEL: @xvslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } ++// CHECK-LABEL: @xvslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } ++// CHECK-LABEL: @xvslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } ++// CHECK-LABEL: @xvslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } ++// CHECK-LABEL: @xvslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } ++// CHECK-LABEL: @xvslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } ++// CHECK-LABEL: @xvslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } ++// CHECK-LABEL: @xvslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } ++// CHECK-LABEL: @xvslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } ++// CHECK-LABEL: @xvslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } ++// CHECK-LABEL: @xvslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } ++// CHECK-LABEL: @xvsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } ++// CHECK-LABEL: @xvsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } ++// CHECK-LABEL: @xvsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } ++// CHECK-LABEL: @xvsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } ++// CHECK-LABEL: @xvslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } ++// CHECK-LABEL: @xvslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } ++// CHECK-LABEL: @xvslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } ++// CHECK-LABEL: @xvslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } ++// CHECK-LABEL: @xvsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } ++// CHECK-LABEL: @xvsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } ++// CHECK-LABEL: @xvsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } ++// CHECK-LABEL: @xvsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } ++// CHECK-LABEL: @xvslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } ++// CHECK-LABEL: @xvslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } ++// CHECK-LABEL: @xvslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } ++// CHECK-LABEL: @xvslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } ++// CHECK-LABEL: @xvsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } ++// CHECK-LABEL: @xvsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } ++// CHECK-LABEL: @xvsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } ++// CHECK-LABEL: @xvsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } ++// CHECK-LABEL: @xvsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } ++// CHECK-LABEL: @xvsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } ++// CHECK-LABEL: @xvsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } ++// CHECK-LABEL: @xvsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } ++// CHECK-LABEL: @xvadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } ++// CHECK-LABEL: @xvadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } ++// CHECK-LABEL: @xvadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } ++// CHECK-LABEL: @xvadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } ++// CHECK-LABEL: @xvsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } ++// CHECK-LABEL: @xvsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } ++// CHECK-LABEL: @xvsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } ++// CHECK-LABEL: @xvsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } ++// CHECK-LABEL: @xvsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } ++// CHECK-LABEL: @xvsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } ++// CHECK-LABEL: @xvavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } ++// CHECK-LABEL: @xvavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } ++// CHECK-LABEL: @xvavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } ++// CHECK-LABEL: @xvavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } ++// CHECK-LABEL: @xvavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } ++// CHECK-LABEL: @xvavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } ++// CHECK-LABEL: @xvavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } ++// CHECK-LABEL: @xvavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } ++// CHECK-LABEL: @xvavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } ++// CHECK-LABEL: @xvavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } ++// CHECK-LABEL: @xvavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } ++// CHECK-LABEL: @xvavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } ++// CHECK-LABEL: @xvavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } ++// CHECK-LABEL: @xvavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } ++// CHECK-LABEL: @xvavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } ++// CHECK-LABEL: @xvavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } ++// CHECK-LABEL: @xvssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } ++// CHECK-LABEL: @xvssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } ++// CHECK-LABEL: @xvssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } ++// CHECK-LABEL: @xvssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } ++// CHECK-LABEL: @xvssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } ++// CHECK-LABEL: @xvssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } ++// CHECK-LABEL: @xvssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } ++// CHECK-LABEL: @xvssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } ++// CHECK-LABEL: @xvabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } ++// CHECK-LABEL: @xvabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } ++// CHECK-LABEL: @xvabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } ++// CHECK-LABEL: @xvabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } ++// CHECK-LABEL: @xvabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } ++// CHECK-LABEL: @xvabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } ++// CHECK-LABEL: @xvabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } ++// CHECK-LABEL: @xvabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } ++// CHECK-LABEL: @xvmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } ++// CHECK-LABEL: @xvmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } ++// CHECK-LABEL: @xvmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } ++// CHECK-LABEL: @xvmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } ++// CHECK-LABEL: @xvmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } ++// CHECK-LABEL: @xvdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } ++// CHECK-LABEL: @xvdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } ++// CHECK-LABEL: @xvdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } ++// CHECK-LABEL: @xvdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } ++// CHECK-LABEL: @xvdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } ++// CHECK-LABEL: @xvdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } ++// CHECK-LABEL: @xvdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } ++// CHECK-LABEL: @xvhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } ++// CHECK-LABEL: @xvmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } ++// CHECK-LABEL: @xvmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } ++// CHECK-LABEL: @xvmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } ++// CHECK-LABEL: @xvmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } ++// CHECK-LABEL: @xvmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } ++// CHECK-LABEL: @xvmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } ++// CHECK-LABEL: @xvrepl128vei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } ++// CHECK-LABEL: @xvpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } ++// CHECK-LABEL: @xvpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } ++// CHECK-LABEL: @xvpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } ++// CHECK-LABEL: @xvpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } ++// CHECK-LABEL: @xvpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } ++// CHECK-LABEL: @xvpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } ++// CHECK-LABEL: @xvpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } ++// CHECK-LABEL: @xvpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } ++// CHECK-LABEL: @xvilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } ++// CHECK-LABEL: @xvilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } ++// CHECK-LABEL: @xvilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } ++// CHECK-LABEL: @xvilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } ++// CHECK-LABEL: @xvilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } ++// CHECK-LABEL: @xvilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } ++// CHECK-LABEL: @xvilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } ++// CHECK-LABEL: @xvilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } ++// CHECK-LABEL: @xvpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } ++// CHECK-LABEL: @xvpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } ++// CHECK-LABEL: @xvpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } ++// CHECK-LABEL: @xvpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } ++// CHECK-LABEL: @xvpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } ++// CHECK-LABEL: @xvpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } ++// CHECK-LABEL: @xvpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } ++// CHECK-LABEL: @xvpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } ++// CHECK-LABEL: @xvshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } ++// CHECK-LABEL: @xvand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } ++// CHECK-LABEL: @xvandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } ++// CHECK-LABEL: @xvor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } ++// CHECK-LABEL: @xvori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } ++// CHECK-LABEL: @xvnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } ++// CHECK-LABEL: @xvnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } ++// CHECK-LABEL: @xvxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } ++// CHECK-LABEL: @xvxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } ++// CHECK-LABEL: @xvbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } ++// CHECK-LABEL: @xvbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @xvshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } ++// CHECK-LABEL: @xvreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } ++// CHECK-LABEL: @xvreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } ++// CHECK-LABEL: @xvreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } ++// CHECK-LABEL: @xvreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } ++// CHECK-LABEL: @xvpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } ++// CHECK-LABEL: @xvpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } ++// CHECK-LABEL: @xvpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } ++// CHECK-LABEL: @xvpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } ++// CHECK-LABEL: @xvclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } ++// CHECK-LABEL: @xvclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } ++// CHECK-LABEL: @xvclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } ++// CHECK-LABEL: @xvclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } ++// CHECK-LABEL: @xvclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } ++// CHECK-LABEL: @xvclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } ++// CHECK-LABEL: @xvclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } ++// CHECK-LABEL: @xvclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } ++// CHECK-LABEL: @xvfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } ++// CHECK-LABEL: @xvfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } ++// CHECK-LABEL: @xvfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } ++// CHECK-LABEL: @xvfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } ++// CHECK-LABEL: @xvfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } ++// CHECK-LABEL: @xvfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } ++// CHECK-LABEL: @xvfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } ++// CHECK-LABEL: @xvfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } ++// CHECK-LABEL: @xvfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @xvfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @xvfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } ++// CHECK-LABEL: @xvfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } ++// CHECK-LABEL: @xvfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } ++// CHECK-LABEL: @xvfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } ++// CHECK-LABEL: @xvfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } ++// CHECK-LABEL: @xvfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } ++// CHECK-LABEL: @xvfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } ++// CHECK-LABEL: @xvfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } ++// CHECK-LABEL: @xvfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } ++// CHECK-LABEL: @xvfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } ++// CHECK-LABEL: @xvfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } ++// CHECK-LABEL: @xvfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } ++// CHECK-LABEL: @xvfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } ++// CHECK-LABEL: @xvfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } ++// CHECK-LABEL: @xvfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } ++// CHECK-LABEL: @xvfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } ++// CHECK-LABEL: @xvflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } ++// CHECK-LABEL: @xvflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } ++// CHECK-LABEL: @xvfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } ++// CHECK-LABEL: @xvfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } ++// CHECK-LABEL: @xvfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } ++// CHECK-LABEL: @xvfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } ++// CHECK-LABEL: @xvftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } ++// CHECK-LABEL: @xvftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } ++// CHECK-LABEL: @xvftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } ++// CHECK-LABEL: @xvftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } ++// CHECK-LABEL: @xvftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } ++// CHECK-LABEL: @xvftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } ++// CHECK-LABEL: @xvftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } ++// CHECK-LABEL: @xvftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } ++// CHECK-LABEL: @xvffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } ++// CHECK-LABEL: @xvffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } ++// CHECK-LABEL: @xvffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } ++// CHECK-LABEL: @xvffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } ++// CHECK-LABEL: @xvreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } ++// CHECK-LABEL: @xvreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } ++// CHECK-LABEL: @xvreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } ++// CHECK-LABEL: @xvreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } ++// CHECK-LABEL: @xvpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @xvandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } ++// CHECK-LABEL: @xvneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } ++// CHECK-LABEL: @xvneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } ++// CHECK-LABEL: @xvneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } ++// CHECK-LABEL: @xvneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } ++// CHECK-LABEL: @xvmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } ++// CHECK-LABEL: @xvmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } ++// CHECK-LABEL: @xvmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } ++// CHECK-LABEL: @xvmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } ++// CHECK-LABEL: @xvmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } ++// CHECK-LABEL: @xvmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } ++// CHECK-LABEL: @xvmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } ++// CHECK-LABEL: @xvmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } ++// CHECK-LABEL: @xvsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @xvsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @xvsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @xvsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @xvsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } ++// CHECK-LABEL: @xvsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } ++// CHECK-LABEL: @xvsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } ++// CHECK-LABEL: @xvssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } ++// CHECK-LABEL: @xvssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } ++// CHECK-LABEL: @xvfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @xvbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } ++// CHECK-LABEL: @xvbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } ++// CHECK-LABEL: @xvextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @xvmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } ++// CHECK-LABEL: @xvmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } ++// CHECK-LABEL: @xvmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } ++// CHECK-LABEL: @xvmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } ++// CHECK-LABEL: @xvsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } ++// CHECK-LABEL: @xvsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } ++// CHECK-LABEL: @xvsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } ++// CHECK-LABEL: @xvsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } ++// CHECK-LABEL: @xvfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } ++// CHECK-LABEL: @xvftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } ++// CHECK-LABEL: @xvftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } ++// CHECK-LABEL: @xvftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } ++// CHECK-LABEL: @xvftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } ++// CHECK-LABEL: @xvftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } ++// CHECK-LABEL: @xvftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } ++// CHECK-LABEL: @xvffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } ++// CHECK-LABEL: @xvftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @xvftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } ++// CHECK-LABEL: @xvftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } ++// CHECK-LABEL: @xvffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } ++// CHECK-LABEL: @xvffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } ++// CHECK-LABEL: @xvftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } ++// CHECK-LABEL: @xvftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } ++// CHECK-LABEL: @xvftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } ++// CHECK-LABEL: @xvftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } ++// CHECK-LABEL: @xvftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } ++// CHECK-LABEL: @xvftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } ++// CHECK-LABEL: @xvftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } ++// CHECK-LABEL: @xvftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } ++// CHECK-LABEL: @xvfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } ++// CHECK-LABEL: @xvfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } ++// CHECK-LABEL: @xvfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } ++// CHECK-LABEL: @xvfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } ++// CHECK-LABEL: @xvfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } ++// CHECK-LABEL: @xvfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } ++// CHECK-LABEL: @xvfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// ++v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } ++// CHECK-LABEL: @xvfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// ++v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } ++// CHECK-LABEL: @xvld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } ++// CHECK-LABEL: @xvst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret void ++// ++void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } ++// CHECK-LABEL: @xvstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @xvstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @xvstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @xvstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @xvinsve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } ++// CHECK-LABEL: @xvinsve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } ++// CHECK-LABEL: @xvpickve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } ++// CHECK-LABEL: @xvpickve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } ++// CHECK-LABEL: @xvssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } ++// CHECK-LABEL: @xvldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldi() { return __builtin_lasx_xvldi(1); } ++// CHECK-LABEL: @xvldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } ++// CHECK-LABEL: @xvstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: ret void ++// ++void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } ++// CHECK-LABEL: @xvextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } ++// CHECK-LABEL: @xvinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @xvinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @xvreplve0_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } ++// CHECK-LABEL: @xvreplve0_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } ++// CHECK-LABEL: @xvreplve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } ++// CHECK-LABEL: @xvreplve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } ++// CHECK-LABEL: @xvreplve0_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } ++// CHECK-LABEL: @vext2xv_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } ++// CHECK-LABEL: @vext2xv_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } ++// CHECK-LABEL: @vext2xv_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } ++// CHECK-LABEL: @vext2xv_w_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } ++// CHECK-LABEL: @vext2xv_d_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } ++// CHECK-LABEL: @vext2xv_d_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } ++// CHECK-LABEL: @vext2xv_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } ++// CHECK-LABEL: @vext2xv_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } ++// CHECK-LABEL: @vext2xv_wu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } ++// CHECK-LABEL: @vext2xv_du_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } ++// CHECK-LABEL: @xvpermi_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } ++// CHECK-LABEL: @xvpermi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } ++// CHECK-LABEL: @xvperm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } ++// CHECK-LABEL: @xvldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } ++// CHECK-LABEL: @xvldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } ++// CHECK-LABEL: @xvldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } ++// CHECK-LABEL: @xvldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } ++// CHECK-LABEL: @xvpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP0]] ++// ++unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @xvaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } ++// CHECK-LABEL: @xvrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } ++// CHECK-LABEL: @xvrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } ++// CHECK-LABEL: @xvrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } ++// CHECK-LABEL: @xvadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } ++// CHECK-LABEL: @xvsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } ++// CHECK-LABEL: @xvmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } ++// CHECK-LABEL: @xvexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } ++// CHECK-LABEL: @xvexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } ++// CHECK-LABEL: @xvexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } ++// CHECK-LABEL: @xvexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } ++// CHECK-LABEL: @xvexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } ++// CHECK-LABEL: @xvexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } ++// CHECK-LABEL: @xvexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } ++// CHECK-LABEL: @xvexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } ++// CHECK-LABEL: @xvrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } ++// CHECK-LABEL: @xvrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } ++// CHECK-LABEL: @xvrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } ++// CHECK-LABEL: @xvrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } ++// CHECK-LABEL: @xvextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } ++// CHECK-LABEL: @xvsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xbnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } ++// CHECK-LABEL: @xbnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } ++// CHECK-LABEL: @xbnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } ++// CHECK-LABEL: @xbnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } ++// CHECK-LABEL: @xbnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } ++// CHECK-LABEL: @xbz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } ++// CHECK-LABEL: @xbz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } ++// CHECK-LABEL: @xbz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } ++// CHECK-LABEL: @xbz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } ++// CHECK-LABEL: @xbz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) ++// CHECK-NEXT: ret i32 [[TMP0]] ++// ++int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } ++// CHECK-LABEL: @xvfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @xvpickve_d_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// ++v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } ++// CHECK-LABEL: @xvpickve_w_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// ++v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } ++// CHECK-LABEL: @xvrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// ++v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } ++// CHECK-LABEL: @xvrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// ++v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } ++// CHECK-LABEL: @xvrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// ++v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } ++// CHECK-LABEL: @xvrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// ++v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +-- +2.20.1 + diff --git a/0008-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch b/0008-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch new file mode 100644 index 0000000..436ab7c --- /dev/null +++ b/0008-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch @@ -0,0 +1,32010 @@ +From 90be01b428b95f0396db80eb5d12c9cea2a6fe20 Mon Sep 17 00:00:00 2001 +From: yjijd +Date: Tue, 2 Jan 2024 11:46:00 +0800 +Subject: [PATCH 08/17] [Clang][LoongArch] Do not pass vector arguments via + vector registers (#74990) + +psABI v2.30 clarifies that vector arguments are passed according to the +base ABI by default. + +(cherry picked from commit 0e01c72c5645259d9a08a1a7ed39cb5cc41ce311) + +Change-Id: I46ae90b6b1f552647812b36fb7aff8bf4724c75f +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 - + .../CodeGen/LoongArch/lasx/builtin-alias.c | 4876 ++++++++++++----- + clang/test/CodeGen/LoongArch/lasx/builtin.c | 4874 +++++++++++----- + .../CodeGen/LoongArch/lsx/builtin-alias.c | 4746 +++++++++++----- + clang/test/CodeGen/LoongArch/lsx/builtin.c | 4746 +++++++++++----- + 5 files changed, 13485 insertions(+), 5764 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 26c68c3583b2..7483bf6d6d1e 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -321,13 +321,6 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + return ABIArgInfo::getDirect(); + } + +- // Pass 128-bit/256-bit vector values via vector registers directly. +- if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && +- (getTarget().hasFeature("lsx"))) || +- ((getContext().getTypeSize(Ty) == 256) && +- getTarget().hasFeature("lasx")))) +- return ABIArgInfo::getDirect(); +- + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +index 09b2d5fcacf5..9a8ce224bcfd 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +@@ -5,4426 +5,6382 @@ + + // CHECK-LABEL: @xvsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } + // CHECK-LABEL: @xvsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } + // CHECK-LABEL: @xvsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } + // CHECK-LABEL: @xvsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } + // CHECK-LABEL: @xvslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } + // CHECK-LABEL: @xvslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } + // CHECK-LABEL: @xvslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } + // CHECK-LABEL: @xvslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } + // CHECK-LABEL: @xvsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } + // CHECK-LABEL: @xvsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } + // CHECK-LABEL: @xvsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } + // CHECK-LABEL: @xvsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } + // CHECK-LABEL: @xvsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } + // CHECK-LABEL: @xvsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } + // CHECK-LABEL: @xvsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } + // CHECK-LABEL: @xvsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } + // CHECK-LABEL: @xvsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } + // CHECK-LABEL: @xvsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } + // CHECK-LABEL: @xvsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } + // CHECK-LABEL: @xvsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } + // CHECK-LABEL: @xvsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } + // CHECK-LABEL: @xvsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } + // CHECK-LABEL: @xvsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } + // CHECK-LABEL: @xvsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } + // CHECK-LABEL: @xvsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } + // CHECK-LABEL: @xvsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } + // CHECK-LABEL: @xvsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } + // CHECK-LABEL: @xvsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } + // CHECK-LABEL: @xvsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } + // CHECK-LABEL: @xvsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } + // CHECK-LABEL: @xvsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } + // CHECK-LABEL: @xvsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } + // CHECK-LABEL: @xvsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } + // CHECK-LABEL: @xvsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } + // CHECK-LABEL: @xvsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } + // CHECK-LABEL: @xvsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } + // CHECK-LABEL: @xvsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } + // CHECK-LABEL: @xvsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } + // CHECK-LABEL: @xvsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } + // CHECK-LABEL: @xvsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } + // CHECK-LABEL: @xvbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } + // CHECK-LABEL: @xvbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } + // CHECK-LABEL: @xvbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } + // CHECK-LABEL: @xvbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } + // CHECK-LABEL: @xvbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } + // CHECK-LABEL: @xvbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } + // CHECK-LABEL: @xvbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } + // CHECK-LABEL: @xvbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } + // CHECK-LABEL: @xvbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } + // CHECK-LABEL: @xvbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } + // CHECK-LABEL: @xvbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } + // CHECK-LABEL: @xvbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } + // CHECK-LABEL: @xvbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } + // CHECK-LABEL: @xvbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } + // CHECK-LABEL: @xvbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } + // CHECK-LABEL: @xvbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } + // CHECK-LABEL: @xvbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } + // CHECK-LABEL: @xvbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } + // CHECK-LABEL: @xvbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } + // CHECK-LABEL: @xvbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } + // CHECK-LABEL: @xvbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } + // CHECK-LABEL: @xvbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } + // CHECK-LABEL: @xvbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } + // CHECK-LABEL: @xvbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } + // CHECK-LABEL: @xvadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } + // CHECK-LABEL: @xvadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } + // CHECK-LABEL: @xvadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } + // CHECK-LABEL: @xvadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } + // CHECK-LABEL: @xvaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } + // CHECK-LABEL: @xvaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } + // CHECK-LABEL: @xvaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } + // CHECK-LABEL: @xvaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } + // CHECK-LABEL: @xvsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } + // CHECK-LABEL: @xvsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } + // CHECK-LABEL: @xvsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } + // CHECK-LABEL: @xvsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } + // CHECK-LABEL: @xvsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } + // CHECK-LABEL: @xvsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } + // CHECK-LABEL: @xvsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } + // CHECK-LABEL: @xvsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } + // CHECK-LABEL: @xvmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } + // CHECK-LABEL: @xvmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } + // CHECK-LABEL: @xvmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } + // CHECK-LABEL: @xvmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } + // CHECK-LABEL: @xvmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } + // CHECK-LABEL: @xvmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } + // CHECK-LABEL: @xvmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } + // CHECK-LABEL: @xvmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } + // CHECK-LABEL: @xvmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } + // CHECK-LABEL: @xvmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } + // CHECK-LABEL: @xvmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } + // CHECK-LABEL: @xvmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } + // CHECK-LABEL: @xvmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } + // CHECK-LABEL: @xvmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } + // CHECK-LABEL: @xvmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } + // CHECK-LABEL: @xvmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } + // CHECK-LABEL: @xvmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } + // CHECK-LABEL: @xvmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } + // CHECK-LABEL: @xvmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } + // CHECK-LABEL: @xvmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } + // CHECK-LABEL: @xvmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } + // CHECK-LABEL: @xvmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } + // CHECK-LABEL: @xvmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } + // CHECK-LABEL: @xvmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } + // CHECK-LABEL: @xvmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } + // CHECK-LABEL: @xvmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } + // CHECK-LABEL: @xvmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } + // CHECK-LABEL: @xvmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } + // CHECK-LABEL: @xvmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } + // CHECK-LABEL: @xvmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } + // CHECK-LABEL: @xvmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } + // CHECK-LABEL: @xvmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } + // CHECK-LABEL: @xvseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } + // CHECK-LABEL: @xvseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } + // CHECK-LABEL: @xvseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } + // CHECK-LABEL: @xvseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } + // CHECK-LABEL: @xvseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } + // CHECK-LABEL: @xvseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } + // CHECK-LABEL: @xvseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } + // CHECK-LABEL: @xvseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } + // CHECK-LABEL: @xvslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } + // CHECK-LABEL: @xvslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } + // CHECK-LABEL: @xvslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } + // CHECK-LABEL: @xvslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } + // CHECK-LABEL: @xvslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } + // CHECK-LABEL: @xvslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } + // CHECK-LABEL: @xvslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } + // CHECK-LABEL: @xvslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } + // CHECK-LABEL: @xvslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } + // CHECK-LABEL: @xvslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } + // CHECK-LABEL: @xvslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } + // CHECK-LABEL: @xvslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } + // CHECK-LABEL: @xvslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } + // CHECK-LABEL: @xvslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } + // CHECK-LABEL: @xvslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } + // CHECK-LABEL: @xvslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } + // CHECK-LABEL: @xvsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } + // CHECK-LABEL: @xvsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } + // CHECK-LABEL: @xvsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } + // CHECK-LABEL: @xvsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } + // CHECK-LABEL: @xvslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } + // CHECK-LABEL: @xvslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } + // CHECK-LABEL: @xvslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } + // CHECK-LABEL: @xvslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } + // CHECK-LABEL: @xvsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } + // CHECK-LABEL: @xvsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } + // CHECK-LABEL: @xvsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } + // CHECK-LABEL: @xvsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } + // CHECK-LABEL: @xvslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } + // CHECK-LABEL: @xvslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } + // CHECK-LABEL: @xvslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } + // CHECK-LABEL: @xvslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } + // CHECK-LABEL: @xvsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } + // CHECK-LABEL: @xvsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } + // CHECK-LABEL: @xvsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } + // CHECK-LABEL: @xvsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } + // CHECK-LABEL: @xvsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } + // CHECK-LABEL: @xvsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } + // CHECK-LABEL: @xvsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } + // CHECK-LABEL: @xvsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } + // CHECK-LABEL: @xvadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } + // CHECK-LABEL: @xvadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } + // CHECK-LABEL: @xvadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } + // CHECK-LABEL: @xvadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } + // CHECK-LABEL: @xvsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } + // CHECK-LABEL: @xvsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } + // CHECK-LABEL: @xvsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } + // CHECK-LABEL: @xvsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } + // CHECK-LABEL: @xvsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } + // CHECK-LABEL: @xvsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } + // CHECK-LABEL: @xvsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } + // CHECK-LABEL: @xvsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } + // CHECK-LABEL: @xvavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } + // CHECK-LABEL: @xvavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } + // CHECK-LABEL: @xvavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } + // CHECK-LABEL: @xvavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } + // CHECK-LABEL: @xvavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } + // CHECK-LABEL: @xvavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } + // CHECK-LABEL: @xvavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } + // CHECK-LABEL: @xvavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } + // CHECK-LABEL: @xvavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } + // CHECK-LABEL: @xvavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } + // CHECK-LABEL: @xvavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } + // CHECK-LABEL: @xvavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } + // CHECK-LABEL: @xvavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } + // CHECK-LABEL: @xvavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } + // CHECK-LABEL: @xvavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } + // CHECK-LABEL: @xvavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } + // CHECK-LABEL: @xvssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } + // CHECK-LABEL: @xvssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } + // CHECK-LABEL: @xvssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } + // CHECK-LABEL: @xvssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } + // CHECK-LABEL: @xvssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } + // CHECK-LABEL: @xvssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } + // CHECK-LABEL: @xvssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } + // CHECK-LABEL: @xvssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } + // CHECK-LABEL: @xvabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } + // CHECK-LABEL: @xvabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } + // CHECK-LABEL: @xvabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } + // CHECK-LABEL: @xvabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } + // CHECK-LABEL: @xvabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } + // CHECK-LABEL: @xvabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } + // CHECK-LABEL: @xvabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } + // CHECK-LABEL: @xvabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } + // CHECK-LABEL: @xvmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } + // CHECK-LABEL: @xvmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } + // CHECK-LABEL: @xvmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } + // CHECK-LABEL: @xvmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } + // CHECK-LABEL: @xvmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } + // CHECK-LABEL: @xvdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } + // CHECK-LABEL: @xvdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } + // CHECK-LABEL: @xvdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } + // CHECK-LABEL: @xvdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } + // CHECK-LABEL: @xvdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } + // CHECK-LABEL: @xvdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } + // CHECK-LABEL: @xvdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } + // CHECK-LABEL: @xvhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } + // CHECK-LABEL: @xvhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } + // CHECK-LABEL: @xvhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @xvhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } + // CHECK-LABEL: @xvhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } + // CHECK-LABEL: @xvhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } + // CHECK-LABEL: @xvhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @xvmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } + // CHECK-LABEL: @xvmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } + // CHECK-LABEL: @xvmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } + // CHECK-LABEL: @xvmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } + // CHECK-LABEL: @xvmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } + // CHECK-LABEL: @xvmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } + // CHECK-LABEL: @xvmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } + // CHECK-LABEL: @xvmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } + // CHECK-LABEL: @xvrepl128vei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } + // CHECK-LABEL: @xvpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } + // CHECK-LABEL: @xvpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } + // CHECK-LABEL: @xvpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } + // CHECK-LABEL: @xvpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } + // CHECK-LABEL: @xvpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } + // CHECK-LABEL: @xvpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } + // CHECK-LABEL: @xvpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } + // CHECK-LABEL: @xvpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } + // CHECK-LABEL: @xvilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } + // CHECK-LABEL: @xvilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } + // CHECK-LABEL: @xvilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } + // CHECK-LABEL: @xvilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } + // CHECK-LABEL: @xvilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } + // CHECK-LABEL: @xvilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } + // CHECK-LABEL: @xvilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } + // CHECK-LABEL: @xvilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } + // CHECK-LABEL: @xvpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } + // CHECK-LABEL: @xvpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } + // CHECK-LABEL: @xvpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } + // CHECK-LABEL: @xvpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } + // CHECK-LABEL: @xvpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } + // CHECK-LABEL: @xvpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } + // CHECK-LABEL: @xvpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } + // CHECK-LABEL: @xvpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } + // CHECK-LABEL: @xvshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } + // CHECK-LABEL: @xvand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } + // CHECK-LABEL: @xvandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } + // CHECK-LABEL: @xvor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } + // CHECK-LABEL: @xvori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } + // CHECK-LABEL: @xvnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } + // CHECK-LABEL: @xvnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } + // CHECK-LABEL: @xvxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } + // CHECK-LABEL: @xvxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } + // CHECK-LABEL: @xvbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } + // CHECK-LABEL: @xvbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @xvshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } + // CHECK-LABEL: @xvshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } + // CHECK-LABEL: @xvshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } + // CHECK-LABEL: @xvreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } + // CHECK-LABEL: @xvreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } + // CHECK-LABEL: @xvreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } + // CHECK-LABEL: @xvreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } + // CHECK-LABEL: @xvpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } + // CHECK-LABEL: @xvpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } + // CHECK-LABEL: @xvpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } + // CHECK-LABEL: @xvpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } + // CHECK-LABEL: @xvclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } + // CHECK-LABEL: @xvclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } + // CHECK-LABEL: @xvclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } + // CHECK-LABEL: @xvclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } + // CHECK-LABEL: @xvclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } + // CHECK-LABEL: @xvclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } + // CHECK-LABEL: @xvclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } + // CHECK-LABEL: @xvclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } + // CHECK-LABEL: @xvfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } + // CHECK-LABEL: @xvfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } + // CHECK-LABEL: @xvfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } + // CHECK-LABEL: @xvfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } + // CHECK-LABEL: @xvfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } + // CHECK-LABEL: @xvfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } + // CHECK-LABEL: @xvfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } + // CHECK-LABEL: @xvfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } + // CHECK-LABEL: @xvfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } + // CHECK-LABEL: @xvfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } + // CHECK-LABEL: @xvfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } + // CHECK-LABEL: @xvfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } + // CHECK-LABEL: @xvfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } + // CHECK-LABEL: @xvfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } + // CHECK-LABEL: @xvfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } + // CHECK-LABEL: @xvfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } + // CHECK-LABEL: @xvfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } + // CHECK-LABEL: @xvfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } + // CHECK-LABEL: @xvfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } + // CHECK-LABEL: @xvfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } + // CHECK-LABEL: @xvfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } + // CHECK-LABEL: @xvfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } + // CHECK-LABEL: @xvfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } + // CHECK-LABEL: @xvfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } + // CHECK-LABEL: @xvfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } + // CHECK-LABEL: @xvfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } + // CHECK-LABEL: @xvfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } + // CHECK-LABEL: @xvfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } + // CHECK-LABEL: @xvflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } + // CHECK-LABEL: @xvflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } + // CHECK-LABEL: @xvfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } + // CHECK-LABEL: @xvfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } + // CHECK-LABEL: @xvfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } + // CHECK-LABEL: @xvfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } + // CHECK-LABEL: @xvftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } + // CHECK-LABEL: @xvftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } + // CHECK-LABEL: @xvftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } + // CHECK-LABEL: @xvftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } + // CHECK-LABEL: @xvftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } + // CHECK-LABEL: @xvftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } + // CHECK-LABEL: @xvftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } + // CHECK-LABEL: @xvftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } + // CHECK-LABEL: @xvffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } + // CHECK-LABEL: @xvffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } + // CHECK-LABEL: @xvffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } + // CHECK-LABEL: @xvffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } + // CHECK-LABEL: @xvreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } + // CHECK-LABEL: @xvreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } + // CHECK-LABEL: @xvreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } + // CHECK-LABEL: @xvreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } + // CHECK-LABEL: @xvpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } + // CHECK-LABEL: @xvandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } + // CHECK-LABEL: @xvneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } + // CHECK-LABEL: @xvneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } + // CHECK-LABEL: @xvneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } + // CHECK-LABEL: @xvneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } + // CHECK-LABEL: @xvmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } + // CHECK-LABEL: @xvmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } + // CHECK-LABEL: @xvmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } + // CHECK-LABEL: @xvmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } + // CHECK-LABEL: @xvmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } + // CHECK-LABEL: @xvmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } + // CHECK-LABEL: @xvmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } + // CHECK-LABEL: @xvmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } + // CHECK-LABEL: @xvsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } + // CHECK-LABEL: @xvsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } + // CHECK-LABEL: @xvsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } + // CHECK-LABEL: @xvsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @xvsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @xvsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @xvsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } + // CHECK-LABEL: @xvsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } + // CHECK-LABEL: @xvsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } + // CHECK-LABEL: @xvssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } + // CHECK-LABEL: @xvssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } + // CHECK-LABEL: @xvssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } + // CHECK-LABEL: @xvssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } + // CHECK-LABEL: @xvsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } + // CHECK-LABEL: @xvsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @xvfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @xvfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @xvfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } + // CHECK-LABEL: @xvfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @xvbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } + // CHECK-LABEL: @xvbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } + // CHECK-LABEL: @xvextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } + // CHECK-LABEL: @xvmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } + // CHECK-LABEL: @xvmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } + // CHECK-LABEL: @xvmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } + // CHECK-LABEL: @xvmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } + // CHECK-LABEL: @xvsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } + // CHECK-LABEL: @xvsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } + // CHECK-LABEL: @xvsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } + // CHECK-LABEL: @xvsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } + // CHECK-LABEL: @xvfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } + // CHECK-LABEL: @xvftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } + // CHECK-LABEL: @xvftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } + // CHECK-LABEL: @xvftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } + // CHECK-LABEL: @xvftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } + // CHECK-LABEL: @xvftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } + // CHECK-LABEL: @xvftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } + // CHECK-LABEL: @xvffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } + // CHECK-LABEL: @xvftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } + // CHECK-LABEL: @xvftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } + // CHECK-LABEL: @xvftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } + // CHECK-LABEL: @xvffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } + // CHECK-LABEL: @xvffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } + // CHECK-LABEL: @xvftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } + // CHECK-LABEL: @xvftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } + // CHECK-LABEL: @xvftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } + // CHECK-LABEL: @xvftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } + // CHECK-LABEL: @xvftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } + // CHECK-LABEL: @xvftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } + // CHECK-LABEL: @xvftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } + // CHECK-LABEL: @xvftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } + // CHECK-LABEL: @xvfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } + // CHECK-LABEL: @xvfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } + // CHECK-LABEL: @xvfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } + // CHECK-LABEL: @xvfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } + // CHECK-LABEL: @xvfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } + // CHECK-LABEL: @xvfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } + // CHECK-LABEL: @xvfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } + // CHECK-LABEL: @xvfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } + // CHECK-LABEL: @xvld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } + // CHECK-LABEL: @xvst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } + // CHECK-LABEL: @xvstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @xvstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @xvstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @xvstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @xvinsve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } + // CHECK-LABEL: @xvinsve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } + // CHECK-LABEL: @xvpickve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } + // CHECK-LABEL: @xvpickve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } + // CHECK-LABEL: @xvssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } + // CHECK-LABEL: @xvssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } + // CHECK-LABEL: @xvssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } + // CHECK-LABEL: @xvorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } + // CHECK-LABEL: @xvldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldi() { return __lasx_xvldi(1); } + // CHECK-LABEL: @xvldx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } + // CHECK-LABEL: @xvstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } + // CHECK-LABEL: @xvextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } + // CHECK-LABEL: @xvinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @xvinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @xvreplve0_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } + // CHECK-LABEL: @xvreplve0_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } + // CHECK-LABEL: @xvreplve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } + // CHECK-LABEL: @xvreplve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } + // CHECK-LABEL: @xvreplve0_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } + // CHECK-LABEL: @vext2xv_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } + // CHECK-LABEL: @vext2xv_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } + // CHECK-LABEL: @vext2xv_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } + // CHECK-LABEL: @vext2xv_w_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } + // CHECK-LABEL: @vext2xv_d_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } + // CHECK-LABEL: @vext2xv_d_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } + // CHECK-LABEL: @vext2xv_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } + // CHECK-LABEL: @vext2xv_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } + // CHECK-LABEL: @vext2xv_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } + // CHECK-LABEL: @vext2xv_wu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } + // CHECK-LABEL: @vext2xv_du_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } + // CHECK-LABEL: @vext2xv_du_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } + // CHECK-LABEL: @xvpermi_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } + // CHECK-LABEL: @xvpermi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } + // CHECK-LABEL: @xvperm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } + // CHECK-LABEL: @xvldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } + // CHECK-LABEL: @xvldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } + // CHECK-LABEL: @xvldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } + // CHECK-LABEL: @xvldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } + // CHECK-LABEL: @xvpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } + // CHECK-LABEL: @xvaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } + // CHECK-LABEL: @xvhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @xvhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } + // CHECK-LABEL: @xvhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @xvmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } + // CHECK-LABEL: @xvrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } + // CHECK-LABEL: @xvrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } + // CHECK-LABEL: @xvrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } + // CHECK-LABEL: @xvadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } + // CHECK-LABEL: @xvsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } + // CHECK-LABEL: @xvmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } + // CHECK-LABEL: @xvexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } + // CHECK-LABEL: @xvexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } + // CHECK-LABEL: @xvexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } + // CHECK-LABEL: @xvexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } + // CHECK-LABEL: @xvexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } + // CHECK-LABEL: @xvexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } + // CHECK-LABEL: @xvexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } + // CHECK-LABEL: @xvexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } + // CHECK-LABEL: @xvrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } + // CHECK-LABEL: @xvrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } + // CHECK-LABEL: @xvrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } + // CHECK-LABEL: @xvrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } + // CHECK-LABEL: @xvextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } + // CHECK-LABEL: @xvsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xbnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } + // CHECK-LABEL: @xbnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } + // CHECK-LABEL: @xbnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } + // CHECK-LABEL: @xbnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } + // CHECK-LABEL: @xbnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } + // CHECK-LABEL: @xbz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } + // CHECK-LABEL: @xbz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } + // CHECK-LABEL: @xbz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } + // CHECK-LABEL: @xbz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } + // CHECK-LABEL: @xbz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } + // CHECK-LABEL: @xvfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @xvpickve_d_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } + // CHECK-LABEL: @xvpickve_w_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } + // CHECK-LABEL: @xvrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } + // CHECK-LABEL: @xvrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } + // CHECK-LABEL: @xvrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } + // CHECK-LABEL: @xvrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +index 0185f2004d52..f52a23a5faea 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c +@@ -27,4426 +27,6382 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + + // CHECK-LABEL: @xvsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } + // CHECK-LABEL: @xvsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } + // CHECK-LABEL: @xvsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } + // CHECK-LABEL: @xvsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } + // CHECK-LABEL: @xvslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } + // CHECK-LABEL: @xvslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } + // CHECK-LABEL: @xvslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } + // CHECK-LABEL: @xvslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } + // CHECK-LABEL: @xvsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } + // CHECK-LABEL: @xvsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } + // CHECK-LABEL: @xvsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } + // CHECK-LABEL: @xvsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } + // CHECK-LABEL: @xvsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } + // CHECK-LABEL: @xvsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } + // CHECK-LABEL: @xvsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } + // CHECK-LABEL: @xvsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } + // CHECK-LABEL: @xvsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } + // CHECK-LABEL: @xvsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } + // CHECK-LABEL: @xvsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } + // CHECK-LABEL: @xvsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } + // CHECK-LABEL: @xvsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } + // CHECK-LABEL: @xvsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } + // CHECK-LABEL: @xvsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } + // CHECK-LABEL: @xvsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } + // CHECK-LABEL: @xvsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } + // CHECK-LABEL: @xvsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } + // CHECK-LABEL: @xvsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } + // CHECK-LABEL: @xvsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } + // CHECK-LABEL: @xvsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } + // CHECK-LABEL: @xvsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } + // CHECK-LABEL: @xvsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } + // CHECK-LABEL: @xvsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } + // CHECK-LABEL: @xvsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } + // CHECK-LABEL: @xvsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } + // CHECK-LABEL: @xvsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } + // CHECK-LABEL: @xvsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } + // CHECK-LABEL: @xvsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } + // CHECK-LABEL: @xvsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } + // CHECK-LABEL: @xvsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } + // CHECK-LABEL: @xvsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } + // CHECK-LABEL: @xvbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } + // CHECK-LABEL: @xvbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } + // CHECK-LABEL: @xvbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } + // CHECK-LABEL: @xvbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } + // CHECK-LABEL: @xvbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } + // CHECK-LABEL: @xvbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } + // CHECK-LABEL: @xvbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } + // CHECK-LABEL: @xvbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } + // CHECK-LABEL: @xvbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } + // CHECK-LABEL: @xvbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } + // CHECK-LABEL: @xvbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } + // CHECK-LABEL: @xvbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } + // CHECK-LABEL: @xvbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } + // CHECK-LABEL: @xvbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } + // CHECK-LABEL: @xvbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } + // CHECK-LABEL: @xvbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } + // CHECK-LABEL: @xvbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } + // CHECK-LABEL: @xvbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } + // CHECK-LABEL: @xvbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } + // CHECK-LABEL: @xvbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } + // CHECK-LABEL: @xvbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } + // CHECK-LABEL: @xvbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } + // CHECK-LABEL: @xvbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } + // CHECK-LABEL: @xvbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } + // CHECK-LABEL: @xvadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } + // CHECK-LABEL: @xvadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } + // CHECK-LABEL: @xvadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } + // CHECK-LABEL: @xvadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } + // CHECK-LABEL: @xvaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } + // CHECK-LABEL: @xvaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } + // CHECK-LABEL: @xvaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } + // CHECK-LABEL: @xvaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } + // CHECK-LABEL: @xvsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } + // CHECK-LABEL: @xvsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } + // CHECK-LABEL: @xvsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } + // CHECK-LABEL: @xvsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } + // CHECK-LABEL: @xvsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } + // CHECK-LABEL: @xvsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } + // CHECK-LABEL: @xvsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } + // CHECK-LABEL: @xvsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } + // CHECK-LABEL: @xvmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } + // CHECK-LABEL: @xvmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } + // CHECK-LABEL: @xvmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } + // CHECK-LABEL: @xvmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } + // CHECK-LABEL: @xvmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } + // CHECK-LABEL: @xvmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } + // CHECK-LABEL: @xvmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } + // CHECK-LABEL: @xvmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } + // CHECK-LABEL: @xvmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } + // CHECK-LABEL: @xvmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } + // CHECK-LABEL: @xvmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } + // CHECK-LABEL: @xvmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } + // CHECK-LABEL: @xvmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } + // CHECK-LABEL: @xvmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } + // CHECK-LABEL: @xvmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } + // CHECK-LABEL: @xvmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } + // CHECK-LABEL: @xvmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } + // CHECK-LABEL: @xvmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } + // CHECK-LABEL: @xvmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } + // CHECK-LABEL: @xvmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } + // CHECK-LABEL: @xvmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } + // CHECK-LABEL: @xvmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } + // CHECK-LABEL: @xvmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } + // CHECK-LABEL: @xvmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } + // CHECK-LABEL: @xvmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } + // CHECK-LABEL: @xvmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } + // CHECK-LABEL: @xvmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } + // CHECK-LABEL: @xvmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } + // CHECK-LABEL: @xvmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } + // CHECK-LABEL: @xvmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } + // CHECK-LABEL: @xvmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } + // CHECK-LABEL: @xvmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } + // CHECK-LABEL: @xvseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } + // CHECK-LABEL: @xvseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } + // CHECK-LABEL: @xvseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } + // CHECK-LABEL: @xvseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } + // CHECK-LABEL: @xvseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } + // CHECK-LABEL: @xvseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } + // CHECK-LABEL: @xvseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } + // CHECK-LABEL: @xvseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } + // CHECK-LABEL: @xvslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } + // CHECK-LABEL: @xvslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } + // CHECK-LABEL: @xvslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } + // CHECK-LABEL: @xvslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } + // CHECK-LABEL: @xvslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } + // CHECK-LABEL: @xvslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } + // CHECK-LABEL: @xvslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } + // CHECK-LABEL: @xvslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } + // CHECK-LABEL: @xvslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } + // CHECK-LABEL: @xvslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } + // CHECK-LABEL: @xvslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } + // CHECK-LABEL: @xvslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } + // CHECK-LABEL: @xvslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } + // CHECK-LABEL: @xvslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } + // CHECK-LABEL: @xvslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } + // CHECK-LABEL: @xvslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } + // CHECK-LABEL: @xvsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } + // CHECK-LABEL: @xvsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } + // CHECK-LABEL: @xvsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } + // CHECK-LABEL: @xvsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } + // CHECK-LABEL: @xvslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } + // CHECK-LABEL: @xvslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } + // CHECK-LABEL: @xvslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } + // CHECK-LABEL: @xvslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } + // CHECK-LABEL: @xvsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } + // CHECK-LABEL: @xvsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } + // CHECK-LABEL: @xvsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } + // CHECK-LABEL: @xvsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } + // CHECK-LABEL: @xvslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } + // CHECK-LABEL: @xvslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } + // CHECK-LABEL: @xvslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } + // CHECK-LABEL: @xvslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } + // CHECK-LABEL: @xvsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } + // CHECK-LABEL: @xvsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } + // CHECK-LABEL: @xvsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } + // CHECK-LABEL: @xvsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } + // CHECK-LABEL: @xvsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } + // CHECK-LABEL: @xvsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } + // CHECK-LABEL: @xvsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } + // CHECK-LABEL: @xvsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } + // CHECK-LABEL: @xvadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } + // CHECK-LABEL: @xvadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } + // CHECK-LABEL: @xvadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } + // CHECK-LABEL: @xvadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } + // CHECK-LABEL: @xvsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } + // CHECK-LABEL: @xvsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } + // CHECK-LABEL: @xvsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } + // CHECK-LABEL: @xvsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } + // CHECK-LABEL: @xvsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } + // CHECK-LABEL: @xvsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } + // CHECK-LABEL: @xvsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } + // CHECK-LABEL: @xvsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } + // CHECK-LABEL: @xvavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } + // CHECK-LABEL: @xvavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } + // CHECK-LABEL: @xvavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } + // CHECK-LABEL: @xvavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } + // CHECK-LABEL: @xvavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } + // CHECK-LABEL: @xvavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } + // CHECK-LABEL: @xvavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } + // CHECK-LABEL: @xvavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } + // CHECK-LABEL: @xvavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } + // CHECK-LABEL: @xvavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } + // CHECK-LABEL: @xvavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } + // CHECK-LABEL: @xvavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } + // CHECK-LABEL: @xvavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } + // CHECK-LABEL: @xvavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } + // CHECK-LABEL: @xvavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } + // CHECK-LABEL: @xvavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } + // CHECK-LABEL: @xvssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } + // CHECK-LABEL: @xvssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } + // CHECK-LABEL: @xvssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } + // CHECK-LABEL: @xvssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } + // CHECK-LABEL: @xvssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } + // CHECK-LABEL: @xvssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } + // CHECK-LABEL: @xvssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } + // CHECK-LABEL: @xvssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } + // CHECK-LABEL: @xvabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } + // CHECK-LABEL: @xvabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } + // CHECK-LABEL: @xvabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } + // CHECK-LABEL: @xvabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } + // CHECK-LABEL: @xvabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } + // CHECK-LABEL: @xvabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } + // CHECK-LABEL: @xvabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } + // CHECK-LABEL: @xvabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } + // CHECK-LABEL: @xvmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } + // CHECK-LABEL: @xvmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } + // CHECK-LABEL: @xvmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } + // CHECK-LABEL: @xvmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } + // CHECK-LABEL: @xvmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } + // CHECK-LABEL: @xvdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } + // CHECK-LABEL: @xvdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } + // CHECK-LABEL: @xvdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } + // CHECK-LABEL: @xvdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } + // CHECK-LABEL: @xvdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } + // CHECK-LABEL: @xvdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } + // CHECK-LABEL: @xvdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } + // CHECK-LABEL: @xvhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } + // CHECK-LABEL: @xvhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } + // CHECK-LABEL: @xvhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @xvhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } + // CHECK-LABEL: @xvhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } + // CHECK-LABEL: @xvhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } + // CHECK-LABEL: @xvhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @xvmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } + // CHECK-LABEL: @xvmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } + // CHECK-LABEL: @xvmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } + // CHECK-LABEL: @xvmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } + // CHECK-LABEL: @xvmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } + // CHECK-LABEL: @xvmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } + // CHECK-LABEL: @xvmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } + // CHECK-LABEL: @xvmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } + // CHECK-LABEL: @xvrepl128vei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } + // CHECK-LABEL: @xvpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } + // CHECK-LABEL: @xvpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } + // CHECK-LABEL: @xvpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } + // CHECK-LABEL: @xvpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } + // CHECK-LABEL: @xvpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } + // CHECK-LABEL: @xvpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } + // CHECK-LABEL: @xvpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } + // CHECK-LABEL: @xvpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } + // CHECK-LABEL: @xvilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } + // CHECK-LABEL: @xvilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } + // CHECK-LABEL: @xvilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } + // CHECK-LABEL: @xvilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } + // CHECK-LABEL: @xvilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } + // CHECK-LABEL: @xvilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } + // CHECK-LABEL: @xvilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } + // CHECK-LABEL: @xvilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } + // CHECK-LABEL: @xvpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } + // CHECK-LABEL: @xvpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } + // CHECK-LABEL: @xvpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } + // CHECK-LABEL: @xvpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } + // CHECK-LABEL: @xvpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } + // CHECK-LABEL: @xvpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } + // CHECK-LABEL: @xvpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } + // CHECK-LABEL: @xvpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } + // CHECK-LABEL: @xvshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } + // CHECK-LABEL: @xvand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } + // CHECK-LABEL: @xvandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } + // CHECK-LABEL: @xvor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } + // CHECK-LABEL: @xvori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } + // CHECK-LABEL: @xvnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } + // CHECK-LABEL: @xvnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } + // CHECK-LABEL: @xvxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } + // CHECK-LABEL: @xvxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } + // CHECK-LABEL: @xvbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } + // CHECK-LABEL: @xvbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @xvshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } + // CHECK-LABEL: @xvshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } + // CHECK-LABEL: @xvshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } + // CHECK-LABEL: @xvreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } + // CHECK-LABEL: @xvreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } + // CHECK-LABEL: @xvreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } + // CHECK-LABEL: @xvreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } + // CHECK-LABEL: @xvpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } + // CHECK-LABEL: @xvpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } + // CHECK-LABEL: @xvpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } + // CHECK-LABEL: @xvpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } + // CHECK-LABEL: @xvclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } + // CHECK-LABEL: @xvclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } + // CHECK-LABEL: @xvclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } + // CHECK-LABEL: @xvclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } + // CHECK-LABEL: @xvclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } + // CHECK-LABEL: @xvclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } + // CHECK-LABEL: @xvclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } + // CHECK-LABEL: @xvclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } + // CHECK-LABEL: @xvfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } + // CHECK-LABEL: @xvfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } + // CHECK-LABEL: @xvfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } + // CHECK-LABEL: @xvfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } + // CHECK-LABEL: @xvfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } + // CHECK-LABEL: @xvfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } + // CHECK-LABEL: @xvfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } + // CHECK-LABEL: @xvfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } + // CHECK-LABEL: @xvfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } + // CHECK-LABEL: @xvfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } + // CHECK-LABEL: @xvfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } + // CHECK-LABEL: @xvfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } + // CHECK-LABEL: @xvfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } + // CHECK-LABEL: @xvfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } + // CHECK-LABEL: @xvfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } + // CHECK-LABEL: @xvfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } + // CHECK-LABEL: @xvfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } + // CHECK-LABEL: @xvfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } + // CHECK-LABEL: @xvfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } + // CHECK-LABEL: @xvfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } + // CHECK-LABEL: @xvfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } + // CHECK-LABEL: @xvfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } + // CHECK-LABEL: @xvfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } + // CHECK-LABEL: @xvfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } + // CHECK-LABEL: @xvfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } + // CHECK-LABEL: @xvfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } + // CHECK-LABEL: @xvfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } + // CHECK-LABEL: @xvfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } + // CHECK-LABEL: @xvflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } + // CHECK-LABEL: @xvflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } + // CHECK-LABEL: @xvfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } + // CHECK-LABEL: @xvfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } + // CHECK-LABEL: @xvfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } + // CHECK-LABEL: @xvfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } + // CHECK-LABEL: @xvftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } + // CHECK-LABEL: @xvftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } + // CHECK-LABEL: @xvftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } + // CHECK-LABEL: @xvftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } + // CHECK-LABEL: @xvftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } + // CHECK-LABEL: @xvftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } + // CHECK-LABEL: @xvftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } + // CHECK-LABEL: @xvftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } + // CHECK-LABEL: @xvffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } + // CHECK-LABEL: @xvffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } + // CHECK-LABEL: @xvffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } + // CHECK-LABEL: @xvffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } + // CHECK-LABEL: @xvreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } + // CHECK-LABEL: @xvreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } + // CHECK-LABEL: @xvreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } + // CHECK-LABEL: @xvreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } + // CHECK-LABEL: @xvpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } + // CHECK-LABEL: @xvandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } + // CHECK-LABEL: @xvneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } + // CHECK-LABEL: @xvneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } + // CHECK-LABEL: @xvneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } + // CHECK-LABEL: @xvneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } + // CHECK-LABEL: @xvmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } + // CHECK-LABEL: @xvmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } + // CHECK-LABEL: @xvmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } + // CHECK-LABEL: @xvmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } + // CHECK-LABEL: @xvmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } + // CHECK-LABEL: @xvmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } + // CHECK-LABEL: @xvmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } + // CHECK-LABEL: @xvmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } + // CHECK-LABEL: @xvsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } + // CHECK-LABEL: @xvsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } + // CHECK-LABEL: @xvsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } + // CHECK-LABEL: @xvsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @xvsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @xvsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @xvsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } + // CHECK-LABEL: @xvsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } + // CHECK-LABEL: @xvsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } + // CHECK-LABEL: @xvssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } + // CHECK-LABEL: @xvssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } + // CHECK-LABEL: @xvssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } + // CHECK-LABEL: @xvssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } + // CHECK-LABEL: @xvsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } + // CHECK-LABEL: @xvsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @xvfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @xvfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @xvfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } + // CHECK-LABEL: @xvfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @xvbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } + // CHECK-LABEL: @xvbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } + // CHECK-LABEL: @xvextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } + // CHECK-LABEL: @xvmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } + // CHECK-LABEL: @xvmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } + // CHECK-LABEL: @xvmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } + // CHECK-LABEL: @xvmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } + // CHECK-LABEL: @xvsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } + // CHECK-LABEL: @xvsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } + // CHECK-LABEL: @xvsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } + // CHECK-LABEL: @xvsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } + // CHECK-LABEL: @xvfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) ++// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) ++// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } + // CHECK-LABEL: @xvftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } + // CHECK-LABEL: @xvftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } + // CHECK-LABEL: @xvftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } + // CHECK-LABEL: @xvftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } + // CHECK-LABEL: @xvftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } + // CHECK-LABEL: @xvftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } + // CHECK-LABEL: @xvffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } + // CHECK-LABEL: @xvftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } + // CHECK-LABEL: @xvftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } + // CHECK-LABEL: @xvftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } + // CHECK-LABEL: @xvffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } + // CHECK-LABEL: @xvffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } + // CHECK-LABEL: @xvftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } + // CHECK-LABEL: @xvftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } + // CHECK-LABEL: @xvftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } + // CHECK-LABEL: @xvftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } + // CHECK-LABEL: @xvftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } + // CHECK-LABEL: @xvftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } + // CHECK-LABEL: @xvftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } + // CHECK-LABEL: @xvftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } + // CHECK-LABEL: @xvfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } + // CHECK-LABEL: @xvfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } + // CHECK-LABEL: @xvfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } + // CHECK-LABEL: @xvfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } + // CHECK-LABEL: @xvfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } + // CHECK-LABEL: @xvfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } + // CHECK-LABEL: @xvfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } + // CHECK-LABEL: @xvfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> [[TMP1]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } + // CHECK-LABEL: @xvld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } + // CHECK-LABEL: @xvst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } + // CHECK-LABEL: @xvstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @xvstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @xvstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @xvstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @xvinsve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } + // CHECK-LABEL: @xvinsve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } + // CHECK-LABEL: @xvpickve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } + // CHECK-LABEL: @xvpickve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } + // CHECK-LABEL: @xvssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } + // CHECK-LABEL: @xvssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } + // CHECK-LABEL: @xvssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } + // CHECK-LABEL: @xvorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } + // CHECK-LABEL: @xvldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldi() { return __builtin_lasx_xvldi(1); } + // CHECK-LABEL: @xvldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } + // CHECK-LABEL: @xvstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } + // CHECK-LABEL: @xvextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } + // CHECK-LABEL: @xvinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @xvinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @xvreplve0_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } + // CHECK-LABEL: @xvreplve0_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } + // CHECK-LABEL: @xvreplve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } + // CHECK-LABEL: @xvreplve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } + // CHECK-LABEL: @xvreplve0_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } + // CHECK-LABEL: @vext2xv_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } + // CHECK-LABEL: @vext2xv_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } + // CHECK-LABEL: @vext2xv_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } + // CHECK-LABEL: @vext2xv_w_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } + // CHECK-LABEL: @vext2xv_d_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } + // CHECK-LABEL: @vext2xv_d_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } + // CHECK-LABEL: @vext2xv_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } + // CHECK-LABEL: @vext2xv_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } + // CHECK-LABEL: @vext2xv_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } + // CHECK-LABEL: @vext2xv_wu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } + // CHECK-LABEL: @vext2xv_du_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } + // CHECK-LABEL: @vext2xv_du_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } + // CHECK-LABEL: @xvpermi_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } + // CHECK-LABEL: @xvpermi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } + // CHECK-LABEL: @xvperm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } + // CHECK-LABEL: @xvldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } + // CHECK-LABEL: @xvldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } + // CHECK-LABEL: @xvldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } + // CHECK-LABEL: @xvldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } + // CHECK-LABEL: @xvpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } + // CHECK-LABEL: @xvaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } + // CHECK-LABEL: @xvhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @xvhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } + // CHECK-LABEL: @xvhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @xvmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) ++// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) ++// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) ++// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } + // CHECK-LABEL: @xvrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } + // CHECK-LABEL: @xvrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } + // CHECK-LABEL: @xvrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } + // CHECK-LABEL: @xvadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } + // CHECK-LABEL: @xvsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } + // CHECK-LABEL: @xvmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } + // CHECK-LABEL: @xvexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } + // CHECK-LABEL: @xvexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } + // CHECK-LABEL: @xvexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } + // CHECK-LABEL: @xvexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } + // CHECK-LABEL: @xvexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } + // CHECK-LABEL: @xvexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } + // CHECK-LABEL: @xvexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } + // CHECK-LABEL: @xvexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } + // CHECK-LABEL: @xvrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } + // CHECK-LABEL: @xvrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } + // CHECK-LABEL: @xvrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } + // CHECK-LABEL: @xvrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } + // CHECK-LABEL: @xvextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } + // CHECK-LABEL: @xvsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) ++// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) ++// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xbnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } + // CHECK-LABEL: @xbnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } + // CHECK-LABEL: @xbnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } + // CHECK-LABEL: @xbnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } + // CHECK-LABEL: @xbnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } + // CHECK-LABEL: @xbz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } + // CHECK-LABEL: @xbz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } + // CHECK-LABEL: @xbz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } + // CHECK-LABEL: @xbz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } + // CHECK-LABEL: @xbz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } + // CHECK-LABEL: @xvfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) ++// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) ++// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @xvpickve_d_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x double> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) ++// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } + // CHECK-LABEL: @xvpickve_w_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x float> [[TMP0]] ++// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) ++// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } + // CHECK-LABEL: @xvrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: ret <32 x i8> [[TMP0]] ++// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } + // CHECK-LABEL: @xvrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: ret <4 x i64> [[TMP0]] ++// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } + // CHECK-LABEL: @xvrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: ret <16 x i16> [[TMP0]] ++// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } + // CHECK-LABEL: @xvrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: ret <8 x i32> [[TMP0]] ++// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] ++// CHECK-NEXT: ret void + // + v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +index 331e29fb7d17..7a84e0ae24f9 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +@@ -5,4080 +5,5838 @@ + + // CHECK-LABEL: @vsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } + // CHECK-LABEL: @vsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } + // CHECK-LABEL: @vsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } + // CHECK-LABEL: @vsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } + // CHECK-LABEL: @vslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } + // CHECK-LABEL: @vslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } + // CHECK-LABEL: @vslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } + // CHECK-LABEL: @vslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } + // CHECK-LABEL: @vsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } + // CHECK-LABEL: @vsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } + // CHECK-LABEL: @vsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } + // CHECK-LABEL: @vsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } + // CHECK-LABEL: @vsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } + // CHECK-LABEL: @vsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } + // CHECK-LABEL: @vsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } + // CHECK-LABEL: @vsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } + // CHECK-LABEL: @vsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } + // CHECK-LABEL: @vsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } + // CHECK-LABEL: @vsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } + // CHECK-LABEL: @vsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } + // CHECK-LABEL: @vsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } + // CHECK-LABEL: @vsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } + // CHECK-LABEL: @vsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } + // CHECK-LABEL: @vsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } + // CHECK-LABEL: @vsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } + // CHECK-LABEL: @vsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } + // CHECK-LABEL: @vsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } + // CHECK-LABEL: @vsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } + // CHECK-LABEL: @vsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } + // CHECK-LABEL: @vsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } + // CHECK-LABEL: @vsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } + // CHECK-LABEL: @vsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } + // CHECK-LABEL: @vsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } + // CHECK-LABEL: @vsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } + // CHECK-LABEL: @vsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } + // CHECK-LABEL: @vsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } + // CHECK-LABEL: @vsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } + // CHECK-LABEL: @vsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } + // CHECK-LABEL: @vsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } + // CHECK-LABEL: @vsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } + // CHECK-LABEL: @vbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } + // CHECK-LABEL: @vbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } + // CHECK-LABEL: @vbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } + // CHECK-LABEL: @vbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } + // CHECK-LABEL: @vbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } + // CHECK-LABEL: @vbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } + // CHECK-LABEL: @vbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } + // CHECK-LABEL: @vbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } + // CHECK-LABEL: @vbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } + // CHECK-LABEL: @vbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } + // CHECK-LABEL: @vbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } + // CHECK-LABEL: @vbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } + // CHECK-LABEL: @vbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } + // CHECK-LABEL: @vbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } + // CHECK-LABEL: @vbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } + // CHECK-LABEL: @vbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } + // CHECK-LABEL: @vbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } + // CHECK-LABEL: @vbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } + // CHECK-LABEL: @vbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } + // CHECK-LABEL: @vbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } + // CHECK-LABEL: @vbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } + // CHECK-LABEL: @vbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } + // CHECK-LABEL: @vbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } + // CHECK-LABEL: @vbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } + // CHECK-LABEL: @vadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } + // CHECK-LABEL: @vadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } + // CHECK-LABEL: @vadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } + // CHECK-LABEL: @vadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } + // CHECK-LABEL: @vaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } + // CHECK-LABEL: @vaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } + // CHECK-LABEL: @vaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } + // CHECK-LABEL: @vaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } + // CHECK-LABEL: @vsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } + // CHECK-LABEL: @vsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } + // CHECK-LABEL: @vsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } + // CHECK-LABEL: @vsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } + // CHECK-LABEL: @vsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } + // CHECK-LABEL: @vsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } + // CHECK-LABEL: @vsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } + // CHECK-LABEL: @vsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } + // CHECK-LABEL: @vmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } + // CHECK-LABEL: @vmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } + // CHECK-LABEL: @vmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } + // CHECK-LABEL: @vmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } + // CHECK-LABEL: @vmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } + // CHECK-LABEL: @vmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } + // CHECK-LABEL: @vmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } + // CHECK-LABEL: @vmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } + // CHECK-LABEL: @vmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } + // CHECK-LABEL: @vmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } + // CHECK-LABEL: @vmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } + // CHECK-LABEL: @vmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } + // CHECK-LABEL: @vmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } + // CHECK-LABEL: @vmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } + // CHECK-LABEL: @vmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } + // CHECK-LABEL: @vmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } + // CHECK-LABEL: @vmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } + // CHECK-LABEL: @vmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } + // CHECK-LABEL: @vmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } + // CHECK-LABEL: @vmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } + // CHECK-LABEL: @vmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } + // CHECK-LABEL: @vmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } + // CHECK-LABEL: @vmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } + // CHECK-LABEL: @vmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } + // CHECK-LABEL: @vmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } + // CHECK-LABEL: @vmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } + // CHECK-LABEL: @vmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } + // CHECK-LABEL: @vmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } + // CHECK-LABEL: @vmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } + // CHECK-LABEL: @vmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } + // CHECK-LABEL: @vmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } + // CHECK-LABEL: @vmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } + // CHECK-LABEL: @vseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } + // CHECK-LABEL: @vseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } + // CHECK-LABEL: @vseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } + // CHECK-LABEL: @vseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } + // CHECK-LABEL: @vseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } + // CHECK-LABEL: @vseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } + // CHECK-LABEL: @vseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } + // CHECK-LABEL: @vseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } + // CHECK-LABEL: @vslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } + // CHECK-LABEL: @vslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } + // CHECK-LABEL: @vslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } + // CHECK-LABEL: @vslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } + // CHECK-LABEL: @vslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } + // CHECK-LABEL: @vslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } + // CHECK-LABEL: @vslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } + // CHECK-LABEL: @vslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } + // CHECK-LABEL: @vslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } + // CHECK-LABEL: @vslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } + // CHECK-LABEL: @vslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } + // CHECK-LABEL: @vslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } + // CHECK-LABEL: @vslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } + // CHECK-LABEL: @vslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } + // CHECK-LABEL: @vslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } + // CHECK-LABEL: @vslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } + // CHECK-LABEL: @vsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } + // CHECK-LABEL: @vsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } + // CHECK-LABEL: @vsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } + // CHECK-LABEL: @vsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } + // CHECK-LABEL: @vslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } + // CHECK-LABEL: @vslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } + // CHECK-LABEL: @vslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } + // CHECK-LABEL: @vslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } + // CHECK-LABEL: @vsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } + // CHECK-LABEL: @vsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } + // CHECK-LABEL: @vsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } + // CHECK-LABEL: @vsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } + // CHECK-LABEL: @vslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } + // CHECK-LABEL: @vslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } + // CHECK-LABEL: @vslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } + // CHECK-LABEL: @vslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } + // CHECK-LABEL: @vsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } + // CHECK-LABEL: @vsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } + // CHECK-LABEL: @vsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } + // CHECK-LABEL: @vsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } + // CHECK-LABEL: @vsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } + // CHECK-LABEL: @vsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } + // CHECK-LABEL: @vsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } + // CHECK-LABEL: @vsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } + // CHECK-LABEL: @vadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } + // CHECK-LABEL: @vadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } + // CHECK-LABEL: @vadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } + // CHECK-LABEL: @vadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } + // CHECK-LABEL: @vsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } + // CHECK-LABEL: @vsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } + // CHECK-LABEL: @vsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } + // CHECK-LABEL: @vsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } + // CHECK-LABEL: @vsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } + // CHECK-LABEL: @vsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } + // CHECK-LABEL: @vsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } + // CHECK-LABEL: @vsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } + // CHECK-LABEL: @vavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } + // CHECK-LABEL: @vavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } + // CHECK-LABEL: @vavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } + // CHECK-LABEL: @vavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } + // CHECK-LABEL: @vavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } + // CHECK-LABEL: @vavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } + // CHECK-LABEL: @vavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } + // CHECK-LABEL: @vavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } + // CHECK-LABEL: @vavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } + // CHECK-LABEL: @vavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } + // CHECK-LABEL: @vavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } + // CHECK-LABEL: @vavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } + // CHECK-LABEL: @vavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } + // CHECK-LABEL: @vavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } + // CHECK-LABEL: @vavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } + // CHECK-LABEL: @vavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } + // CHECK-LABEL: @vssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } + // CHECK-LABEL: @vssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } + // CHECK-LABEL: @vssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } + // CHECK-LABEL: @vssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } + // CHECK-LABEL: @vssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } + // CHECK-LABEL: @vssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } + // CHECK-LABEL: @vssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } + // CHECK-LABEL: @vssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } + // CHECK-LABEL: @vabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } + // CHECK-LABEL: @vabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } + // CHECK-LABEL: @vabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } + // CHECK-LABEL: @vabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } + // CHECK-LABEL: @vabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } + // CHECK-LABEL: @vabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } + // CHECK-LABEL: @vabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } + // CHECK-LABEL: @vabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } + // CHECK-LABEL: @vmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } + // CHECK-LABEL: @vmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } + // CHECK-LABEL: @vmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } + // CHECK-LABEL: @vmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } + // CHECK-LABEL: @vmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmadd_b(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmadd_h(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmadd_w(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmsub_b(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmsub_h(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmsub_w(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } + // CHECK-LABEL: @vdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } + // CHECK-LABEL: @vdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } + // CHECK-LABEL: @vdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } + // CHECK-LABEL: @vdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } + // CHECK-LABEL: @vdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } + // CHECK-LABEL: @vdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } + // CHECK-LABEL: @vdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } + // CHECK-LABEL: @vhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } + // CHECK-LABEL: @vhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } + // CHECK-LABEL: @vhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } + // CHECK-LABEL: @vhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @vhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @vhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @vhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } + // CHECK-LABEL: @vhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } + // CHECK-LABEL: @vhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } + // CHECK-LABEL: @vhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @vhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @vhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @vmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } + // CHECK-LABEL: @vmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } + // CHECK-LABEL: @vmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } + // CHECK-LABEL: @vmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } + // CHECK-LABEL: @vmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } + // CHECK-LABEL: @vmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } + // CHECK-LABEL: @vmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } + // CHECK-LABEL: @vmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } + // CHECK-LABEL: @vreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } + // CHECK-LABEL: @vreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } + // CHECK-LABEL: @vreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } + // CHECK-LABEL: @vreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } + // CHECK-LABEL: @vreplvei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } + // CHECK-LABEL: @vreplvei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } + // CHECK-LABEL: @vreplvei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } + // CHECK-LABEL: @vreplvei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } + // CHECK-LABEL: @vpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } + // CHECK-LABEL: @vpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } + // CHECK-LABEL: @vpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } + // CHECK-LABEL: @vpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } + // CHECK-LABEL: @vpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } + // CHECK-LABEL: @vpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } + // CHECK-LABEL: @vpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } + // CHECK-LABEL: @vpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } + // CHECK-LABEL: @vilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } + // CHECK-LABEL: @vilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } + // CHECK-LABEL: @vilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } + // CHECK-LABEL: @vilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } + // CHECK-LABEL: @vilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } + // CHECK-LABEL: @vilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } + // CHECK-LABEL: @vilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } + // CHECK-LABEL: @vilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } + // CHECK-LABEL: @vpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } + // CHECK-LABEL: @vpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } + // CHECK-LABEL: @vpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } + // CHECK-LABEL: @vpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } + // CHECK-LABEL: @vpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } + // CHECK-LABEL: @vpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } + // CHECK-LABEL: @vpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } + // CHECK-LABEL: @vpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } + // CHECK-LABEL: @vshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vshuf_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vshuf_w(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vshuf_d(_1, _2, _3); + } + // CHECK-LABEL: @vand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } + // CHECK-LABEL: @vandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } + // CHECK-LABEL: @vor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } + // CHECK-LABEL: @vori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } + // CHECK-LABEL: @vnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } + // CHECK-LABEL: @vnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } + // CHECK-LABEL: @vxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } + // CHECK-LABEL: @vxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } + // CHECK-LABEL: @vbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __lsx_vbitsel_v(_1, _2, _3); + } + // CHECK-LABEL: @vbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @vshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } + // CHECK-LABEL: @vshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } + // CHECK-LABEL: @vshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } + // CHECK-LABEL: @vreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } + // CHECK-LABEL: @vreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } + // CHECK-LABEL: @vreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } + // CHECK-LABEL: @vreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } + // CHECK-LABEL: @vpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } + // CHECK-LABEL: @vpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } + // CHECK-LABEL: @vpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } + // CHECK-LABEL: @vpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } + // CHECK-LABEL: @vclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } + // CHECK-LABEL: @vclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } + // CHECK-LABEL: @vclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } + // CHECK-LABEL: @vclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } + // CHECK-LABEL: @vclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } + // CHECK-LABEL: @vclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } + // CHECK-LABEL: @vclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } + // CHECK-LABEL: @vclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } + // CHECK-LABEL: @vpickve2gr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } + // CHECK-LABEL: @vpickve2gr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } + // CHECK-LABEL: @vpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } + // CHECK-LABEL: @vpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } + // CHECK-LABEL: @vpickve2gr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } + // CHECK-LABEL: @vinsgr2vr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @vfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } + // CHECK-LABEL: @vfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } + // CHECK-LABEL: @vfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } + // CHECK-LABEL: @vfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } + // CHECK-LABEL: @vfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } + // CHECK-LABEL: @vfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } + // CHECK-LABEL: @vfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } + // CHECK-LABEL: @vfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } + // CHECK-LABEL: @vfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } + // CHECK-LABEL: @vfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } + // CHECK-LABEL: @vfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } + // CHECK-LABEL: @vfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } + // CHECK-LABEL: @vfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } + // CHECK-LABEL: @vfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } + // CHECK-LABEL: @vfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } + // CHECK-LABEL: @vfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } + // CHECK-LABEL: @vfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } + // CHECK-LABEL: @vfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } + // CHECK-LABEL: @vfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } + // CHECK-LABEL: @vfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } + // CHECK-LABEL: @vfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } + // CHECK-LABEL: @vfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } + // CHECK-LABEL: @vfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } + // CHECK-LABEL: @vfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } + // CHECK-LABEL: @vfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } + // CHECK-LABEL: @vfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } + // CHECK-LABEL: @vfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } + // CHECK-LABEL: @vfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } + // CHECK-LABEL: @vflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } + // CHECK-LABEL: @vflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } + // CHECK-LABEL: @vfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } + // CHECK-LABEL: @vfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } + // CHECK-LABEL: @vfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } + // CHECK-LABEL: @vfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } + // CHECK-LABEL: @vftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } + // CHECK-LABEL: @vftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } + // CHECK-LABEL: @vftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } + // CHECK-LABEL: @vftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } + // CHECK-LABEL: @vftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } + // CHECK-LABEL: @vftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } + // CHECK-LABEL: @vftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } + // CHECK-LABEL: @vftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } + // CHECK-LABEL: @vffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } + // CHECK-LABEL: @vffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } + // CHECK-LABEL: @vffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } + // CHECK-LABEL: @vffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } + // CHECK-LABEL: @vandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } + // CHECK-LABEL: @vneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } + // CHECK-LABEL: @vneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } + // CHECK-LABEL: @vneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } + // CHECK-LABEL: @vneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } + // CHECK-LABEL: @vmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } + // CHECK-LABEL: @vmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } + // CHECK-LABEL: @vmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } + // CHECK-LABEL: @vmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } + // CHECK-LABEL: @vmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } + // CHECK-LABEL: @vmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } + // CHECK-LABEL: @vmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } + // CHECK-LABEL: @vmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } + // CHECK-LABEL: @vsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } + // CHECK-LABEL: @vsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } + // CHECK-LABEL: @vsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } + // CHECK-LABEL: @vsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @vsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @vsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @vsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } + // CHECK-LABEL: @vsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } + // CHECK-LABEL: @vsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } + // CHECK-LABEL: @vssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } + // CHECK-LABEL: @vssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } + // CHECK-LABEL: @vssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } + // CHECK-LABEL: @vssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } + // CHECK-LABEL: @vssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } + // CHECK-LABEL: @vssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } + // CHECK-LABEL: @vsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } + // CHECK-LABEL: @vsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } + // CHECK-LABEL: @vsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } + // CHECK-LABEL: @vssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } + // CHECK-LABEL: @vssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } + // CHECK-LABEL: @vssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } + // CHECK-LABEL: @vssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @vssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @vssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @vsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } + // CHECK-LABEL: @vsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } + // CHECK-LABEL: @vsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } + // CHECK-LABEL: @vssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } + // CHECK-LABEL: @vssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } + // CHECK-LABEL: @vssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } + // CHECK-LABEL: @vsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @vsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @vsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @vssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @vssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @vssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @vfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @vfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @vfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vfrstp_b(_1, _2, _3); + } + // CHECK-LABEL: @vfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vfrstp_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @vbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } + // CHECK-LABEL: @vbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } + // CHECK-LABEL: @vextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } + // CHECK-LABEL: @vextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } + // CHECK-LABEL: @vextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } + // CHECK-LABEL: @vextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } + // CHECK-LABEL: @vmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } + // CHECK-LABEL: @vmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } + // CHECK-LABEL: @vmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } + // CHECK-LABEL: @vmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } + // CHECK-LABEL: @vsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } + // CHECK-LABEL: @vsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } + // CHECK-LABEL: @vsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } + // CHECK-LABEL: @vsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } + // CHECK-LABEL: @vfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } + // CHECK-LABEL: @vftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } + // CHECK-LABEL: @vftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } + // CHECK-LABEL: @vftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } + // CHECK-LABEL: @vftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } + // CHECK-LABEL: @vftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } + // CHECK-LABEL: @vftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } + // CHECK-LABEL: @vffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } + // CHECK-LABEL: @vftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } + // CHECK-LABEL: @vftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } + // CHECK-LABEL: @vftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } + // CHECK-LABEL: @vftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } + // CHECK-LABEL: @vftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } + // CHECK-LABEL: @vftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } + // CHECK-LABEL: @vffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } + // CHECK-LABEL: @vffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } + // CHECK-LABEL: @vftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } + // CHECK-LABEL: @vftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } + // CHECK-LABEL: @vftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } + // CHECK-LABEL: @vftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } + // CHECK-LABEL: @vftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } + // CHECK-LABEL: @vftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } + // CHECK-LABEL: @vftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } + // CHECK-LABEL: @vftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } + // CHECK-LABEL: @vfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } + // CHECK-LABEL: @vfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } + // CHECK-LABEL: @vfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } + // CHECK-LABEL: @vfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } + // CHECK-LABEL: @vfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } + // CHECK-LABEL: @vfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } + // CHECK-LABEL: @vfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } + // CHECK-LABEL: @vfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } + // CHECK-LABEL: @vstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @vstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @vstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @vstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @vaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } + // CHECK-LABEL: @vaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } + // CHECK-LABEL: @vaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } + // CHECK-LABEL: @vaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } + // CHECK-LABEL: @vaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } + // CHECK-LABEL: @vaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } + // CHECK-LABEL: @vaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @vaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @vaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @vaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @vaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } + // CHECK-LABEL: @vsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } + // CHECK-LABEL: @vsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } + // CHECK-LABEL: @vsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } + // CHECK-LABEL: @vsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } + // CHECK-LABEL: @vsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } + // CHECK-LABEL: @vsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @vsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @vsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @vsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @vsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @vsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } + // CHECK-LABEL: @vaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } + // CHECK-LABEL: @vaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } + // CHECK-LABEL: @vaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } + // CHECK-LABEL: @vsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } + // CHECK-LABEL: @vsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } + // CHECK-LABEL: @vsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } + // CHECK-LABEL: @vsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } + // CHECK-LABEL: @vaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } + // CHECK-LABEL: @vmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } + // CHECK-LABEL: @vmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } + // CHECK-LABEL: @vmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } + // CHECK-LABEL: @vmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } + // CHECK-LABEL: @vmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } + // CHECK-LABEL: @vmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @vmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @vmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @vmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @vmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @vmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @vmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } + // CHECK-LABEL: @vmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } + // CHECK-LABEL: @vmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } + // CHECK-LABEL: @vmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } + // CHECK-LABEL: @vmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } + // CHECK-LABEL: @vhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @vhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } + // CHECK-LABEL: @vhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @vmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwev_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwev_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwev_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwod_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwod_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwod_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwev_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwod_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } + // CHECK-LABEL: @vrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } + // CHECK-LABEL: @vrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } + // CHECK-LABEL: @vrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } + // CHECK-LABEL: @vadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } + // CHECK-LABEL: @vsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } + // CHECK-LABEL: @vldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } + // CHECK-LABEL: @vldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } + // CHECK-LABEL: @vldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } + // CHECK-LABEL: @vldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } + // CHECK-LABEL: @vmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } + // CHECK-LABEL: @vmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } + // CHECK-LABEL: @vexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } + // CHECK-LABEL: @vexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } + // CHECK-LABEL: @vexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } + // CHECK-LABEL: @vexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } + // CHECK-LABEL: @vexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } + // CHECK-LABEL: @vexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } + // CHECK-LABEL: @vexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } + // CHECK-LABEL: @vexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } + // CHECK-LABEL: @vrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } + // CHECK-LABEL: @vrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } + // CHECK-LABEL: @vrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } + // CHECK-LABEL: @vrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } + // CHECK-LABEL: @vextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } + // CHECK-LABEL: @vsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrlrni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrlrni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrlrni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrlrni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @vssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @vssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @vssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrarni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrarni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrarni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrarni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } + // CHECK-LABEL: @vld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } + // CHECK-LABEL: @vst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } + // CHECK-LABEL: @vssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @vssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @vssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @vssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } + // CHECK-LABEL: @vssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } + // CHECK-LABEL: @vssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } + // CHECK-LABEL: @vorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } + // CHECK-LABEL: @vldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldi() { return __lsx_vldi(1); } + // CHECK-LABEL: @vshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vshuf_b(_1, _2, _3); +@@ -4086,366 +5844,516 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + // CHECK-LABEL: @vldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } + // CHECK-LABEL: @vstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } + // CHECK-LABEL: @vextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } + // CHECK-LABEL: @bnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } + // CHECK-LABEL: @bnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } + // CHECK-LABEL: @bnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } + // CHECK-LABEL: @bnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } + // CHECK-LABEL: @bnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } + // CHECK-LABEL: @bz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } + // CHECK-LABEL: @bz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } + // CHECK-LABEL: @bz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } + // CHECK-LABEL: @bz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } + // CHECK-LABEL: @bz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } + // CHECK-LABEL: @vfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @vfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @vfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @vfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @vfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @vfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @vfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @vfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @vfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @vfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @vrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vrepli_b() { return __lsx_vrepli_b(1); } + // CHECK-LABEL: @vrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vrepli_d() { return __lsx_vrepli_d(1); } + // CHECK-LABEL: @vrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vrepli_h() { return __lsx_vrepli_h(1); } + // CHECK-LABEL: @vrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +index ef5a390e1838..05a3d13a7fb9 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c +@@ -29,3319 +29,4547 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + + // CHECK-LABEL: @vsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } + // CHECK-LABEL: @vsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } + // CHECK-LABEL: @vsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } + // CHECK-LABEL: @vsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } + // CHECK-LABEL: @vslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } + // CHECK-LABEL: @vslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } + // CHECK-LABEL: @vslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } + // CHECK-LABEL: @vslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } + // CHECK-LABEL: @vsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } + // CHECK-LABEL: @vsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } + // CHECK-LABEL: @vsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } + // CHECK-LABEL: @vsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } + // CHECK-LABEL: @vsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } + // CHECK-LABEL: @vsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } + // CHECK-LABEL: @vsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } + // CHECK-LABEL: @vsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } + // CHECK-LABEL: @vsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrar_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrar_b(_1, _2); + } + // CHECK-LABEL: @vsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrar_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrar_h(_1, _2); + } + // CHECK-LABEL: @vsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrar_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrar_w(_1, _2); + } + // CHECK-LABEL: @vsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrar_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrar_d(_1, _2); + } + // CHECK-LABEL: @vsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } + // CHECK-LABEL: @vsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } + // CHECK-LABEL: @vsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } + // CHECK-LABEL: @vsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } + // CHECK-LABEL: @vsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } + // CHECK-LABEL: @vsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } + // CHECK-LABEL: @vsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } + // CHECK-LABEL: @vsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } + // CHECK-LABEL: @vsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } + // CHECK-LABEL: @vsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } + // CHECK-LABEL: @vsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } + // CHECK-LABEL: @vsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } + // CHECK-LABEL: @vsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlr_b(_1, _2); + } + // CHECK-LABEL: @vsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlr_h(_1, _2); + } + // CHECK-LABEL: @vsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlr_w(_1, _2); + } + // CHECK-LABEL: @vsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlr_d(_1, _2); + } + // CHECK-LABEL: @vsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } + // CHECK-LABEL: @vsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } + // CHECK-LABEL: @vsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } + // CHECK-LABEL: @vsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } + // CHECK-LABEL: @vbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitclr_b(_1, _2); + } + // CHECK-LABEL: @vbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitclr_h(_1, _2); + } + // CHECK-LABEL: @vbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitclr_w(_1, _2); + } + // CHECK-LABEL: @vbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitclr_d(_1, _2); + } + // CHECK-LABEL: @vbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } + // CHECK-LABEL: @vbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } + // CHECK-LABEL: @vbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } + // CHECK-LABEL: @vbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } + // CHECK-LABEL: @vbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitset_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitset_b(_1, _2); + } + // CHECK-LABEL: @vbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitset_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitset_h(_1, _2); + } + // CHECK-LABEL: @vbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitset_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitset_w(_1, _2); + } + // CHECK-LABEL: @vbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitset_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitset_d(_1, _2); + } + // CHECK-LABEL: @vbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } + // CHECK-LABEL: @vbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } + // CHECK-LABEL: @vbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } + // CHECK-LABEL: @vbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } + // CHECK-LABEL: @vbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitrev_b(_1, _2); + } + // CHECK-LABEL: @vbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitrev_h(_1, _2); + } + // CHECK-LABEL: @vbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitrev_w(_1, _2); + } + // CHECK-LABEL: @vbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitrev_d(_1, _2); + } + // CHECK-LABEL: @vbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } + // CHECK-LABEL: @vbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } + // CHECK-LABEL: @vbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } + // CHECK-LABEL: @vbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } + // CHECK-LABEL: @vadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } + // CHECK-LABEL: @vadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } + // CHECK-LABEL: @vadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } + // CHECK-LABEL: @vadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } + // CHECK-LABEL: @vaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } + // CHECK-LABEL: @vaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } + // CHECK-LABEL: @vaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } + // CHECK-LABEL: @vaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } + // CHECK-LABEL: @vsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } + // CHECK-LABEL: @vsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } + // CHECK-LABEL: @vsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } + // CHECK-LABEL: @vsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } + // CHECK-LABEL: @vsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } + // CHECK-LABEL: @vsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } + // CHECK-LABEL: @vsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } + // CHECK-LABEL: @vsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } + // CHECK-LABEL: @vmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } + // CHECK-LABEL: @vmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } + // CHECK-LABEL: @vmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } + // CHECK-LABEL: @vmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } + // CHECK-LABEL: @vmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } + // CHECK-LABEL: @vmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } + // CHECK-LABEL: @vmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } + // CHECK-LABEL: @vmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } + // CHECK-LABEL: @vmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmax_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmax_bu(_1, _2); + } + // CHECK-LABEL: @vmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmax_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmax_hu(_1, _2); + } + // CHECK-LABEL: @vmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmax_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmax_wu(_1, _2); + } + // CHECK-LABEL: @vmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmax_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmax_du(_1, _2); + } + // CHECK-LABEL: @vmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } + // CHECK-LABEL: @vmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } + // CHECK-LABEL: @vmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } + // CHECK-LABEL: @vmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } + // CHECK-LABEL: @vmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } + // CHECK-LABEL: @vmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } + // CHECK-LABEL: @vmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } + // CHECK-LABEL: @vmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } + // CHECK-LABEL: @vmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } + // CHECK-LABEL: @vmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } + // CHECK-LABEL: @vmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } + // CHECK-LABEL: @vmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } + // CHECK-LABEL: @vmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmin_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmin_bu(_1, _2); + } + // CHECK-LABEL: @vmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmin_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmin_hu(_1, _2); + } + // CHECK-LABEL: @vmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmin_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmin_wu(_1, _2); + } + // CHECK-LABEL: @vmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmin_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmin_du(_1, _2); + } + // CHECK-LABEL: @vmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } + // CHECK-LABEL: @vmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } + // CHECK-LABEL: @vmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } + // CHECK-LABEL: @vmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } + // CHECK-LABEL: @vseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } + // CHECK-LABEL: @vseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } + // CHECK-LABEL: @vseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } + // CHECK-LABEL: @vseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } + // CHECK-LABEL: @vseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } + // CHECK-LABEL: @vseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } + // CHECK-LABEL: @vseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } + // CHECK-LABEL: @vseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } + // CHECK-LABEL: @vslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } + // CHECK-LABEL: @vslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } + // CHECK-LABEL: @vslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } + // CHECK-LABEL: @vslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } + // CHECK-LABEL: @vslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } + // CHECK-LABEL: @vslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } + // CHECK-LABEL: @vslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } + // CHECK-LABEL: @vslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } + // CHECK-LABEL: @vslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vslt_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vslt_bu(_1, _2); + } + // CHECK-LABEL: @vslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vslt_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vslt_hu(_1, _2); + } + // CHECK-LABEL: @vslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vslt_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vslt_wu(_1, _2); + } + // CHECK-LABEL: @vslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vslt_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vslt_du(_1, _2); + } + // CHECK-LABEL: @vslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } + // CHECK-LABEL: @vslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } + // CHECK-LABEL: @vslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } + // CHECK-LABEL: @vslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } + // CHECK-LABEL: @vsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } + // CHECK-LABEL: @vsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } + // CHECK-LABEL: @vsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } + // CHECK-LABEL: @vsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } + // CHECK-LABEL: @vslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } + // CHECK-LABEL: @vslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } + // CHECK-LABEL: @vslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } + // CHECK-LABEL: @vslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } + // CHECK-LABEL: @vsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsle_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsle_bu(_1, _2); + } + // CHECK-LABEL: @vsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsle_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsle_hu(_1, _2); + } + // CHECK-LABEL: @vsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsle_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsle_wu(_1, _2); + } + // CHECK-LABEL: @vsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsle_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsle_du(_1, _2); + } + // CHECK-LABEL: @vslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } + // CHECK-LABEL: @vslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } + // CHECK-LABEL: @vslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } + // CHECK-LABEL: @vslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } + // CHECK-LABEL: @vsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } + // CHECK-LABEL: @vsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } + // CHECK-LABEL: @vsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } + // CHECK-LABEL: @vsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } + // CHECK-LABEL: @vsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } + // CHECK-LABEL: @vsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } + // CHECK-LABEL: @vsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } + // CHECK-LABEL: @vsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } + // CHECK-LABEL: @vadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vadda_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vadda_b(_1, _2); + } + // CHECK-LABEL: @vadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vadda_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vadda_h(_1, _2); + } + // CHECK-LABEL: @vadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vadda_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vadda_w(_1, _2); + } + // CHECK-LABEL: @vadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadda_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vadda_d(_1, _2); + } + // CHECK-LABEL: @vsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsadd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsadd_b(_1, _2); + } + // CHECK-LABEL: @vsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsadd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsadd_h(_1, _2); + } + // CHECK-LABEL: @vsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsadd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsadd_w(_1, _2); + } + // CHECK-LABEL: @vsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsadd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsadd_d(_1, _2); + } + // CHECK-LABEL: @vsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsadd_bu(_1, _2); + } + // CHECK-LABEL: @vsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsadd_hu(_1, _2); + } + // CHECK-LABEL: @vsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsadd_wu(_1, _2); + } + // CHECK-LABEL: @vsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vsadd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsadd_du(_1, _2); + } + // CHECK-LABEL: @vavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } + // CHECK-LABEL: @vavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } + // CHECK-LABEL: @vavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } + // CHECK-LABEL: @vavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } + // CHECK-LABEL: @vavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavg_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavg_bu(_1, _2); + } + // CHECK-LABEL: @vavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavg_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavg_hu(_1, _2); + } + // CHECK-LABEL: @vavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavg_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavg_wu(_1, _2); + } + // CHECK-LABEL: @vavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavg_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavg_du(_1, _2); + } + // CHECK-LABEL: @vavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vavgr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vavgr_b(_1, _2); + } + // CHECK-LABEL: @vavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vavgr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vavgr_h(_1, _2); + } + // CHECK-LABEL: @vavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vavgr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vavgr_w(_1, _2); + } + // CHECK-LABEL: @vavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vavgr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vavgr_d(_1, _2); + } + // CHECK-LABEL: @vavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavgr_bu(_1, _2); + } + // CHECK-LABEL: @vavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavgr_hu(_1, _2); + } + // CHECK-LABEL: @vavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavgr_wu(_1, _2); + } + // CHECK-LABEL: @vavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vavgr_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavgr_du(_1, _2); + } + // CHECK-LABEL: @vssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssub_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssub_b(_1, _2); + } + // CHECK-LABEL: @vssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssub_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssub_h(_1, _2); + } + // CHECK-LABEL: @vssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssub_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssub_w(_1, _2); + } + // CHECK-LABEL: @vssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssub_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssub_d(_1, _2); + } + // CHECK-LABEL: @vssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssub_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vssub_bu(_1, _2); + } + // CHECK-LABEL: @vssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssub_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssub_hu(_1, _2); + } + // CHECK-LABEL: @vssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssub_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssub_wu(_1, _2); + } + // CHECK-LABEL: @vssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssub_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssub_du(_1, _2); + } + // CHECK-LABEL: @vabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vabsd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vabsd_b(_1, _2); + } + // CHECK-LABEL: @vabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vabsd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vabsd_h(_1, _2); + } + // CHECK-LABEL: @vabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vabsd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vabsd_w(_1, _2); + } + // CHECK-LABEL: @vabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vabsd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vabsd_d(_1, _2); + } + // CHECK-LABEL: @vabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vabsd_bu(_1, _2); + } + // CHECK-LABEL: @vabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vabsd_hu(_1, _2); + } + // CHECK-LABEL: @vabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vabsd_wu(_1, _2); + } + // CHECK-LABEL: @vabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vabsd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vabsd_du(_1, _2); + } + // CHECK-LABEL: @vmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } + // CHECK-LABEL: @vmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } + // CHECK-LABEL: @vmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } + // CHECK-LABEL: @vmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } + // CHECK-LABEL: @vmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmadd_b(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmadd_h(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmadd_w(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmsub_b(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmsub_h(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmsub_w(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } + // CHECK-LABEL: @vdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } + // CHECK-LABEL: @vdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } + // CHECK-LABEL: @vdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } + // CHECK-LABEL: @vdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vdiv_bu(_1, _2); + } + // CHECK-LABEL: @vdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vdiv_hu(_1, _2); + } + // CHECK-LABEL: @vdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vdiv_wu(_1, _2); + } + // CHECK-LABEL: @vdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vdiv_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vdiv_du(_1, _2); + } + // CHECK-LABEL: @vhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhaddw_h_b(_1, _2); + } + // CHECK-LABEL: @vhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhaddw_w_h(_1, _2); + } + // CHECK-LABEL: @vhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhaddw_d_w(_1, _2); + } + // CHECK-LABEL: @vhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhaddw_hu_bu(_1, _2); + } + // CHECK-LABEL: @vhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhaddw_wu_hu(_1, _2); + } + // CHECK-LABEL: @vhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhaddw_du_wu(_1, _2); + } + // CHECK-LABEL: @vhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhsubw_h_b(_1, _2); + } + // CHECK-LABEL: @vhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhsubw_w_h(_1, _2); + } + // CHECK-LABEL: @vhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhsubw_d_w(_1, _2); + } + // CHECK-LABEL: @vhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhsubw_hu_bu(_1, _2); + } + // CHECK-LABEL: @vhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhsubw_wu_hu(_1, _2); + } + // CHECK-LABEL: @vhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhsubw_du_wu(_1, _2); + } + // CHECK-LABEL: @vmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } + // CHECK-LABEL: @vmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } + // CHECK-LABEL: @vmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } + // CHECK-LABEL: @vmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } + // CHECK-LABEL: @vmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmod_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmod_bu(_1, _2); + } + // CHECK-LABEL: @vmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmod_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmod_hu(_1, _2); + } + // CHECK-LABEL: @vmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmod_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmod_wu(_1, _2); + } + // CHECK-LABEL: @vmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmod_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmod_du(_1, _2); + } + // CHECK-LABEL: @vreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplve_b(v16i8 _1, int _2) { + return __builtin_lsx_vreplve_b(_1, _2); + } + // CHECK-LABEL: @vreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplve_h(v8i16 _1, int _2) { + return __builtin_lsx_vreplve_h(_1, _2); + } + // CHECK-LABEL: @vreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplve_w(v4i32 _1, int _2) { + return __builtin_lsx_vreplve_w(_1, _2); + } + // CHECK-LABEL: @vreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplve_d(v2i64 _1, int _2) { + return __builtin_lsx_vreplve_d(_1, _2); + } + // CHECK-LABEL: @vreplvei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } + // CHECK-LABEL: @vreplvei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } + // CHECK-LABEL: @vreplvei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } + // CHECK-LABEL: @vreplvei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } + // CHECK-LABEL: @vpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickev_b(_1, _2); + } + // CHECK-LABEL: @vpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickev_h(_1, _2); + } + // CHECK-LABEL: @vpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickev_w(_1, _2); + } + // CHECK-LABEL: @vpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickev_d(_1, _2); + } + // CHECK-LABEL: @vpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpickod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickod_b(_1, _2); + } + // CHECK-LABEL: @vpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpickod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickod_h(_1, _2); + } + // CHECK-LABEL: @vpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpickod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickod_w(_1, _2); + } + // CHECK-LABEL: @vpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpickod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickod_d(_1, _2); + } + // CHECK-LABEL: @vilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvh_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvh_b(_1, _2); + } + // CHECK-LABEL: @vilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvh_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvh_h(_1, _2); + } + // CHECK-LABEL: @vilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvh_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvh_w(_1, _2); + } + // CHECK-LABEL: @vilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvh_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvh_d(_1, _2); + } + // CHECK-LABEL: @vilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vilvl_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvl_b(_1, _2); + } + // CHECK-LABEL: @vilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vilvl_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvl_h(_1, _2); + } + // CHECK-LABEL: @vilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vilvl_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvl_w(_1, _2); + } + // CHECK-LABEL: @vilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vilvl_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvl_d(_1, _2); + } + // CHECK-LABEL: @vpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackev_b(_1, _2); + } + // CHECK-LABEL: @vpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackev_h(_1, _2); + } + // CHECK-LABEL: @vpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackev_w(_1, _2); + } + // CHECK-LABEL: @vpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackev_d(_1, _2); + } + // CHECK-LABEL: @vpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vpackod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackod_b(_1, _2); + } + // CHECK-LABEL: @vpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vpackod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackod_h(_1, _2); + } + // CHECK-LABEL: @vpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpackod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackod_w(_1, _2); + } + // CHECK-LABEL: @vpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vpackod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackod_d(_1, _2); + } + // CHECK-LABEL: @vshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vshuf_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vshuf_w(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vshuf_d(_1, _2, _3); + } + // CHECK-LABEL: @vand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } + // CHECK-LABEL: @vandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } + // CHECK-LABEL: @vor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } + // CHECK-LABEL: @vori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } + // CHECK-LABEL: @vnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } + // CHECK-LABEL: @vnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } + // CHECK-LABEL: @vxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } + // CHECK-LABEL: @vxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } + // CHECK-LABEL: @vbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vbitsel_v(_1, _2, _3); + } + // CHECK-LABEL: @vbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitseli_b(_1, _2, 1); + } + // CHECK-LABEL: @vshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } + // CHECK-LABEL: @vshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } + // CHECK-LABEL: @vshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } + // CHECK-LABEL: @vreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } + // CHECK-LABEL: @vreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } + // CHECK-LABEL: @vreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } + // CHECK-LABEL: @vreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } + // CHECK-LABEL: @vpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } + // CHECK-LABEL: @vpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } + // CHECK-LABEL: @vpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } + // CHECK-LABEL: @vpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } + // CHECK-LABEL: @vclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } + // CHECK-LABEL: @vclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } + // CHECK-LABEL: @vclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } + // CHECK-LABEL: @vclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } + // CHECK-LABEL: @vclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } + // CHECK-LABEL: @vclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } + // CHECK-LABEL: @vclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } + // CHECK-LABEL: @vclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } + // CHECK-LABEL: @vpickve2gr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } + // CHECK-LABEL: @vpickve2gr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } + // CHECK-LABEL: @vpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } + // CHECK-LABEL: @vpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } + // CHECK-LABEL: @vpickve2gr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_bu(v16i8 _1) { + return __builtin_lsx_vpickve2gr_bu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_hu(v8i16 _1) { + return __builtin_lsx_vpickve2gr_hu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + unsigned int vpickve2gr_wu(v4i32 _1) { + return __builtin_lsx_vpickve2gr_wu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: ret i64 [[TMP1]] + // + unsigned long int vpickve2gr_du(v2i64 _1) { + return __builtin_lsx_vpickve2gr_du(_1, 1); + } + // CHECK-LABEL: @vinsgr2vr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vinsgr2vr_b(v16i8 _1) { + return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vinsgr2vr_h(v8i16 _1) { + return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vinsgr2vr_w(v4i32 _1) { + return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vinsgr2vr_d(v2i64 _1) { + return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); + } + // CHECK-LABEL: @vfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfadd_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfadd_s(_1, _2); + } + // CHECK-LABEL: @vfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfadd_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfadd_d(_1, _2); + } + // CHECK-LABEL: @vfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfsub_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfsub_s(_1, _2); + } + // CHECK-LABEL: @vfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfsub_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfsub_d(_1, _2); + } + // CHECK-LABEL: @vfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmul_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmul_s(_1, _2); + } + // CHECK-LABEL: @vfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmul_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmul_d(_1, _2); + } + // CHECK-LABEL: @vfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfdiv_s(_1, _2); + } + // CHECK-LABEL: @vfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfdiv_d(_1, _2); + } + // CHECK-LABEL: @vfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcvt_h_s(_1, _2); + } + // CHECK-LABEL: @vfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcvt_s_d(_1, _2); + } + // CHECK-LABEL: @vfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmin_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmin_s(_1, _2); + } + // CHECK-LABEL: @vfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmin_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmin_d(_1, _2); + } + // CHECK-LABEL: @vfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmina_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmina_s(_1, _2); + } + // CHECK-LABEL: @vfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmina_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmina_d(_1, _2); + } + // CHECK-LABEL: @vfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmax_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmax_s(_1, _2); + } + // CHECK-LABEL: @vfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmax_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmax_d(_1, _2); + } + // CHECK-LABEL: @vfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmaxa_s(_1, _2); + } + // CHECK-LABEL: @vfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmaxa_d(_1, _2); + } + // CHECK-LABEL: @vfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } + // CHECK-LABEL: @vfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } + // CHECK-LABEL: @vfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } + // CHECK-LABEL: @vfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } + // CHECK-LABEL: @vfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } + // CHECK-LABEL: @vfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } + // CHECK-LABEL: @vfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } + // CHECK-LABEL: @vfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } + // CHECK-LABEL: @vfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } + // CHECK-LABEL: @vfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } + // CHECK-LABEL: @vflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } + // CHECK-LABEL: @vflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } + // CHECK-LABEL: @vfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } + // CHECK-LABEL: @vfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } + // CHECK-LABEL: @vfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } + // CHECK-LABEL: @vfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } + // CHECK-LABEL: @vftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } + // CHECK-LABEL: @vftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } + // CHECK-LABEL: @vftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } + // CHECK-LABEL: @vftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } + // CHECK-LABEL: @vftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } + // CHECK-LABEL: @vftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } + // CHECK-LABEL: @vftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } + // CHECK-LABEL: @vftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } + // CHECK-LABEL: @vffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } + // CHECK-LABEL: @vffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } + // CHECK-LABEL: @vffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } + // CHECK-LABEL: @vffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } + // CHECK-LABEL: @vandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vandn_v(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vandn_v(_1, _2); + } + // CHECK-LABEL: @vneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } + // CHECK-LABEL: @vneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } + // CHECK-LABEL: @vneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } + // CHECK-LABEL: @vneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } + // CHECK-LABEL: @vmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } + // CHECK-LABEL: @vmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } + // CHECK-LABEL: @vmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } + // CHECK-LABEL: @vmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } + // CHECK-LABEL: @vmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmuh_bu(_1, _2); + } + // CHECK-LABEL: @vmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmuh_hu(_1, _2); + } + // CHECK-LABEL: @vmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmuh_wu(_1, _2); + } + // CHECK-LABEL: @vmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vmuh_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmuh_du(_1, _2); + } + // CHECK-LABEL: @vsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } + // CHECK-LABEL: @vsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } + // CHECK-LABEL: @vsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } + // CHECK-LABEL: @vsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vsllwil_hu_bu(v16u8 _1) { + return __builtin_lsx_vsllwil_hu_bu(_1, 1); + } + // CHECK-LABEL: @vsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vsllwil_wu_hu(v8u16 _1) { + return __builtin_lsx_vsllwil_wu_hu(_1, 1); + } + // CHECK-LABEL: @vsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vsllwil_du_wu(v4u32 _1) { + return __builtin_lsx_vsllwil_du_wu(_1, 1); + } + // CHECK-LABEL: @vsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsran_b_h(_1, _2); + } + // CHECK-LABEL: @vsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsran_h_w(_1, _2); + } + // CHECK-LABEL: @vsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsran_w_d(_1, _2); + } + // CHECK-LABEL: @vssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssran_b_h(_1, _2); + } + // CHECK-LABEL: @vssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssran_h_w(_1, _2); + } + // CHECK-LABEL: @vssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssran_w_d(_1, _2); + } + // CHECK-LABEL: @vssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssran_bu_h(_1, _2); + } + // CHECK-LABEL: @vssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssran_hu_w(_1, _2); + } + // CHECK-LABEL: @vssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssran_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarn_b_h(_1, _2); + } + // CHECK-LABEL: @vsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarn_h_w(_1, _2); + } + // CHECK-LABEL: @vsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarn_b_h(_1, _2); + } + // CHECK-LABEL: @vssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarn_h_w(_1, _2); + } + // CHECK-LABEL: @vssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrarn_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrarn_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrarn_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrln_b_h(_1, _2); + } + // CHECK-LABEL: @vsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrln_h_w(_1, _2); + } + // CHECK-LABEL: @vsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrln_w_d(_1, _2); + } + // CHECK-LABEL: @vssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrln_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrln_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrln_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrn_b_h(_1, _2); + } + // CHECK-LABEL: @vsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrn_h_w(_1, _2); + } + // CHECK-LABEL: @vsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrlrn_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrlrn_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrlrn_wu_d(_1, _2); + } + // CHECK-LABEL: @vfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vfrstpi_b(_1, _2, 1); + } + // CHECK-LABEL: @vfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vfrstpi_h(_1, _2, 1); + } + // CHECK-LABEL: @vfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vfrstp_b(_1, _2, _3); + } + // CHECK-LABEL: @vfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vfrstp_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vshuf4i_d(_1, _2, 1); + } + // CHECK-LABEL: @vbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } + // CHECK-LABEL: @vbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } + // CHECK-LABEL: @vextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vextrins_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vextrins_b(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vextrins_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vextrins_h(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vextrins_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vextrins_w(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vextrins_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vextrins_d(_1, _2, 1); + } + // CHECK-LABEL: @vmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } + // CHECK-LABEL: @vmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } + // CHECK-LABEL: @vmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } + // CHECK-LABEL: @vmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } + // CHECK-LABEL: @vsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsigncov_b(_1, _2); + } + // CHECK-LABEL: @vsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsigncov_h(_1, _2); + } + // CHECK-LABEL: @vsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsigncov_w(_1, _2); + } + // CHECK-LABEL: @vsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsigncov_d(_1, _2); + } + // CHECK-LABEL: @vfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } + // CHECK-LABEL: @vftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } + // CHECK-LABEL: @vftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } + // CHECK-LABEL: @vftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } + // CHECK-LABEL: @vftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } + // CHECK-LABEL: @vftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } + // CHECK-LABEL: @vftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftint_w_d(_1, _2); + } + // CHECK-LABEL: @vffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x float> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vffint_s_l(_1, _2); + } + // CHECK-LABEL: @vftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrz_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrp_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrm_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrne_w_d(_1, _2); + } + // CHECK-LABEL: @vftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } + // CHECK-LABEL: @vftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } + // CHECK-LABEL: @vffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } + // CHECK-LABEL: @vffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x double> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } + // CHECK-LABEL: @vftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } + // CHECK-LABEL: @vftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } + // CHECK-LABEL: @vftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } + // CHECK-LABEL: @vftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } + // CHECK-LABEL: @vftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } + // CHECK-LABEL: @vftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } + // CHECK-LABEL: @vftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrnel_l_s(v4f32 _1) { + return __builtin_lsx_vftintrnel_l_s(_1); + } + // CHECK-LABEL: @vftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vftintrneh_l_s(v4f32 _1) { + return __builtin_lsx_vftintrneh_l_s(_1); + } + // CHECK-LABEL: @vfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } + // CHECK-LABEL: @vfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } + // CHECK-LABEL: @vfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } + // CHECK-LABEL: @vfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } + // CHECK-LABEL: @vfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } + // CHECK-LABEL: @vfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } + // CHECK-LABEL: @vfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } + // CHECK-LABEL: @vfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> [[TMP1]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } + // CHECK-LABEL: @vstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_b(v16i8 _1, void *_2) { +@@ -3349,7 +4577,8 @@ void vstelm_b(v16i8 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_h(v8i16 _1, void *_2) { +@@ -3357,7 +4586,8 @@ void vstelm_h(v8i16 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_w(v4i32 _1, void *_2) { +@@ -3365,7 +4595,8 @@ void vstelm_w(v4i32 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_d(v2i64 _1, void *_2) { +@@ -3373,1286 +4604,1785 @@ void vstelm_d(v2i64 _1, void *_2) { + } + // CHECK-LABEL: @vaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_b(_1, _2); + } + // CHECK-LABEL: @vaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwev_d_w(_1, _2); + } + // CHECK-LABEL: @vsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwev_w_h(_1, _2); + } + // CHECK-LABEL: @vsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwev_h_b(_1, _2); + } + // CHECK-LABEL: @vsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwod_d_w(_1, _2); + } + // CHECK-LABEL: @vsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwod_w_h(_1, _2); + } + // CHECK-LABEL: @vsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwod_h_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_d(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwev_q_du(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwod_q_du(_1, _2); + } + // CHECK-LABEL: @vsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwev_q_d(_1, _2); + } + // CHECK-LABEL: @vsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwod_q_d(_1, _2); + } + // CHECK-LABEL: @vsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwev_q_du(_1, _2); + } + // CHECK-LABEL: @vsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwod_q_du(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwev_q_du(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwod_q_du(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhaddw_q_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhaddw_qu_du(_1, _2); + } + // CHECK-LABEL: @vhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhsubw_q_d(_1, _2); + } + // CHECK-LABEL: @vhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhsubw_qu_du(_1, _2); + } + // CHECK-LABEL: @vmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vrotr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vrotr_b(_1, _2); + } + // CHECK-LABEL: @vrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vrotr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vrotr_h(_1, _2); + } + // CHECK-LABEL: @vrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vrotr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vrotr_w(_1, _2); + } + // CHECK-LABEL: @vrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vrotr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vrotr_d(_1, _2); + } + // CHECK-LABEL: @vadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } + // CHECK-LABEL: @vsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } + // CHECK-LABEL: @vldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } + // CHECK-LABEL: @vldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } + // CHECK-LABEL: @vldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } + // CHECK-LABEL: @vldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } + // CHECK-LABEL: @vmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } + // CHECK-LABEL: @vmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } + // CHECK-LABEL: @vexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } + // CHECK-LABEL: @vexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } + // CHECK-LABEL: @vexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } + // CHECK-LABEL: @vexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } + // CHECK-LABEL: @vexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } + // CHECK-LABEL: @vexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } + // CHECK-LABEL: @vexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } + // CHECK-LABEL: @vexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } + // CHECK-LABEL: @vrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } + // CHECK-LABEL: @vrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } + // CHECK-LABEL: @vrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } + // CHECK-LABEL: @vrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } + // CHECK-LABEL: @vextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } + // CHECK-LABEL: @vsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrani_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrani_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrani_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrani_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrarni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpermi_w(_1, _2, 1); +@@ -4660,79 +6390,107 @@ v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + // CHECK-LABEL: @vld( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } + // CHECK-LABEL: @vst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) + // CHECK-NEXT: ret void + // + void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } + // CHECK-LABEL: @vssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrn_b_h(_1, _2); + } + // CHECK-LABEL: @vssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrn_h_w(_1, _2); + } + // CHECK-LABEL: @vssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrln_b_h(_1, _2); + } + // CHECK-LABEL: @vssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrln_h_w(_1, _2); + } + // CHECK-LABEL: @vssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrln_w_d(_1, _2); + } + // CHECK-LABEL: @vorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } + // CHECK-LABEL: @vldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vldi() { return __builtin_lsx_vldi(1); } + // CHECK-LABEL: @vshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) ++// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 ++// CHECK-NEXT: ret i128 [[TMP4]] + // + v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vshuf_b(_1, _2, _3); +@@ -4740,429 +6498,575 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + // CHECK-LABEL: @vldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } + // CHECK-LABEL: @vstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) + // CHECK-NEXT: ret void + // + void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } + // CHECK-LABEL: @vextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ++// CHECK-NEXT: ret i128 [[TMP2]] + // + v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } + // CHECK-LABEL: @bnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } + // CHECK-LABEL: @bnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } + // CHECK-LABEL: @bnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } + // CHECK-LABEL: @bnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } + // CHECK-LABEL: @bnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } + // CHECK-LABEL: @bz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } + // CHECK-LABEL: @bz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } + // CHECK-LABEL: @bz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } + // CHECK-LABEL: @bz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } + // CHECK-LABEL: @bz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) +-// CHECK-NEXT: ret i32 [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) ++// CHECK-NEXT: ret i32 [[TMP1]] + // + int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } + // CHECK-LABEL: @vfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_caf_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_caf_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_ceq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_ceq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cle_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cle_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_clt_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_clt_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cne_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cne_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cor_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cor_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cueq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cueq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cule_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cule_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cult_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cult_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cun_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cune_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cune_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cun_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_saf_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_saf_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_seq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_seq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sle_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sle_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_slt_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_slt_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sne_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sne_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sor_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sor_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sueq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sueq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sule_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sule_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sult_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sult_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sun_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sune_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sune_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> ++// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) ++// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ++// CHECK-NEXT: ret i128 [[TMP3]] + // + v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sun_s(_1, _2); +@@ -5170,24 +7074,28 @@ v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + // CHECK-LABEL: @vrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: ret <16 x i8> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } + // CHECK-LABEL: @vrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: ret <2 x i64> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } + // CHECK-LABEL: @vrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: ret <8 x i16> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } + // CHECK-LABEL: @vrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: ret <4 x i32> [[TMP0]] ++// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 ++// CHECK-NEXT: ret i128 [[TMP1]] + // + v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +-- +2.20.1 + diff --git a/0009-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch b/0009-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch new file mode 100644 index 0000000..accec34 --- /dev/null +++ b/0009-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch @@ -0,0 +1,42 @@ +From fe9da4afce8f001e9e62ff1a84fe274a2f65c063 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:20:27 +0800 +Subject: [PATCH 09/17] [Driver] Support -fsanitize=cfi-icall on loongarch64 + (#67310) + +(cherry picked from commit 55accc82bec48acae769b086ad9a5dc29da77f02) +Change-Id: I5f3640da5a96c37e13665bfa2c3edb1e497fa09e +--- + clang/lib/Driver/ToolChain.cpp | 3 ++- + clang/test/Driver/fsanitize.c | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp +index 8dafc3d481c2..138f4135b7a1 100644 +--- a/clang/lib/Driver/ToolChain.cpp ++++ b/clang/lib/Driver/ToolChain.cpp +@@ -1273,7 +1273,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { + if (getTriple().getArch() == llvm::Triple::x86 || + getTriple().getArch() == llvm::Triple::x86_64 || + getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() || +- getTriple().isAArch64() || getTriple().isRISCV()) ++ getTriple().isAArch64() || getTriple().isRISCV() || ++ getTriple().isLoongArch64()) + Res |= SanitizerKind::CFIICall; + if (getTriple().getArch() == llvm::Triple::x86_64 || + getTriple().isAArch64(64) || getTriple().isRISCV()) +diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c +index 9442f6b91471..4a525d75ea11 100644 +--- a/clang/test/Driver/fsanitize.c ++++ b/clang/test/Driver/fsanitize.c +@@ -600,6 +600,7 @@ + // RUN: %clang --target=aarch64_be -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // RUN: %clang --target=riscv32 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // RUN: %clang --target=riscv64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI ++// RUN: %clang --target=loongarch64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // CHECK-CFI: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-mfcall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall + // CHECK-CFI-NOMFCALL: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall + // CHECK-CFI-DCAST: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast +-- +2.20.1 + diff --git a/0010-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch b/0010-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch new file mode 100644 index 0000000..77895b7 --- /dev/null +++ b/0010-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch @@ -0,0 +1,62 @@ +From 2f22695f7be1f9c5b2a09901efd5a2268ef22c87 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Fri, 8 Sep 2023 10:54:35 -0700 +Subject: [PATCH 10/17] [Clang][LoongArch] Generate _mcount instead of mcount + (#65657) + +When building the LoongArch Linux kernel without +`CONFIG_DYNAMIC_FTRACE`, the build fails to link because the mcount +symbol is `mcount`, not `_mcount` like GCC generates and the kernel +expects: + +``` +ld.lld: error: undefined symbol: mcount +>>> referenced by version.c +>>> init/version.o:(early_hostname) in archive vmlinux.a +>>> referenced by do_mounts.c +>>> init/do_mounts.o:(rootfs_init_fs_context) in archive vmlinux.a +>>> referenced by main.c +>>> init/main.o:(__traceiter_initcall_level) in archive vmlinux.a +>>> referenced 97011 more times +>>> did you mean: _mcount +>>> defined in: vmlinux.a(arch/loongarch/kernel/mcount.o) +``` + +Set `MCountName` in `LoongArchTargetInfo` to `_mcount`, which resolves +the build failure. + +(cherry picked from commit cc2b09bee017147527e7bd1eb5272f4f70a7b900) +Change-Id: I7d8b9b50c22471688b577ca434f8654cdfd0d1da +--- + clang/lib/Basic/Targets/LoongArch.h | 1 + + clang/test/CodeGen/mcount.c | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 8f4150b2539d..3313102492cb 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -40,6 +40,7 @@ public: + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ MCountName = "_mcount"; + SuitableAlign = 128; + WCharType = SignedInt; + WIntType = UnsignedInt; +diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c +index 8f994ab4e754..bdd609c1dfc5 100644 +--- a/clang/test/CodeGen/mcount.c ++++ b/clang/test/CodeGen/mcount.c +@@ -7,6 +7,8 @@ + // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s ++// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s ++// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +-- +2.20.1 + diff --git a/0011-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch b/0011-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch new file mode 100644 index 0000000..06a0eb5 --- /dev/null +++ b/0011-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch @@ -0,0 +1,87 @@ +From 65807250035e1f9631e863ac6cc9af74c39d4c4f Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Thu, 26 Oct 2023 11:50:28 +0800 +Subject: [PATCH 11/17] [LoongArch][test] Add some ABI regression tests for + empty struct. NFC + +How empty structs (not as fields of container struct) are passed in C++ +is not explicitly documented in psABI. This patch adds some tests +showing the current handing of clang. Some of the results are different +from gcc. Following patch(es) will try to fix the mismatch. + +(cherry picked from commit 8149066fa532d82ff62a0629d5a9fab6bd4da768) +Change-Id: I65eaa6250dd2e32b644b278f9e20350ed482e18b +--- + .../LoongArch/abi-lp64d-empty-structs.c | 53 +++++++++++++++++++ + 1 file changed, 53 insertions(+) + +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index fb90bf556c19..d0daafac336e 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { + return a; + } + ++/// Note: Below tests check how empty structs are passed while above tests check ++/// empty structs as fields of container struct are ignored when flattening ++/// structs to examine whether the container structs can be passed via FARs. ++ + // CHECK-C: define{{.*}} void @test_s9() + // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) + struct s9 { struct empty e; }; + struct s9 test_s9(struct s9 a) { + return a; + } ++ ++// CHECK-C: define{{.*}} void @test_s10() ++// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() ++struct s10 { }; ++struct s10 test_s10(struct s10 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s11() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) ++struct s11 { struct { } s; }; ++struct s11 test_s11(struct s11 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s12() ++// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() ++struct s12 { int i[0]; }; ++struct s12 test_s12(struct s12 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s13() ++// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() ++struct s13 { struct { } s[0]; }; ++struct s13 test_s13(struct s13 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s14() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) ++struct s14 { struct { } s[1]; }; ++struct s14 test_s14(struct s14 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s15() ++// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() ++struct s15 { int : 0; }; ++struct s15 test_s15(struct s15 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s16() ++// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() ++struct s16 { int : 1; }; ++struct s16 test_s16(struct s16 a) { ++ return a; ++} +-- +2.20.1 + diff --git a/0012-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch b/0012-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch new file mode 100644 index 0000000..4ac2d66 --- /dev/null +++ b/0012-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch @@ -0,0 +1,86 @@ +From 98951b297f1a6635e9533d677fbdff4496d5bb8e Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Tue, 31 Oct 2023 21:18:06 +0800 +Subject: [PATCH 12/17] [LoongArch] Fix ABI mismatch with gcc/g++ about empty + structs passing (#70320) + +How empty structs (not as fields of container struct) are passed in C++ +is not explicitly documented in psABI. However, this patch fixes the +mismatch with g++. + +Note that the unnamed bitfield case `struct { int : 1; }` in C is also +fixed. Previously clang regards it as an empty struct and then ignores +it when passing. Now size of the struct is counted; since it's size is +not 0, clang will not ignore it even in C. + +While https://reviews.llvm.org/D156116 fixed the handling of empty +struct when considering eligibility of the container struct for the FP +calling convention ('flattening'), this patch fixes the handling of +passing the empty struct itself. + +Fix https://github.com/llvm/llvm-project/issues/70319 + +(cherry picked from commit 9ca6bf3fb7b7df373723b3275730f101f9ff816b) +Change-Id: I9cfa77d6cdba31406349ac88d12e20ef96662d90 +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 10 ++++++---- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 8 ++++---- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 7483bf6d6d1e..bc508a99da9c 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -308,12 +308,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + CGCXXABI::RAA_DirectInMemory); + } + +- // Ignore empty structs/unions. +- if (isEmptyRecord(getContext(), Ty, true)) +- return ABIArgInfo::getIgnore(); +- + uint64_t Size = getContext().getTypeSize(Ty); + ++ // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or ++ // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size ++ // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. ++ if (isEmptyRecord(getContext(), Ty, true) && Size == 0) ++ return ABIArgInfo::getIgnore(); ++ + // Pass floating point values via FARs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && FARsLeft) { +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index d0daafac336e..281b7b15841a 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -93,7 +93,7 @@ struct s9 test_s9(struct s9 a) { + } + + // CHECK-C: define{{.*}} void @test_s10() +-// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) + struct s10 { }; + struct s10 test_s10(struct s10 a) { + return a; +@@ -128,14 +128,14 @@ struct s14 test_s14(struct s14 a) { + } + + // CHECK-C: define{{.*}} void @test_s15() +-// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) + struct s15 { int : 0; }; + struct s15 test_s15(struct s15 a) { + return a; + } + +-// CHECK-C: define{{.*}} void @test_s16() +-// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() ++// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) + struct s16 { int : 1; }; + struct s16 test_s16(struct s16 a) { + return a; +-- +2.20.1 + diff --git a/0013-LoongArch-Pre-commit-test-for-issue-70890.patch b/0013-LoongArch-Pre-commit-test-for-issue-70890.patch new file mode 100644 index 0000000..c810291 --- /dev/null +++ b/0013-LoongArch-Pre-commit-test-for-issue-70890.patch @@ -0,0 +1,47 @@ +From 8a6049b9d4baf0df5abe7f156bf6ec4b3fb7a256 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Thu, 2 Nov 2023 09:29:43 +0800 +Subject: [PATCH 13/17] [LoongArch] Pre-commit test for issue #70890 + +(cherry picked from commit 749083b91f31f370cf64831d3e7e6215b6d51442) +Change-Id: I33985b8a736f50767071a46116be36f8e719df56 +--- + .../LoongArch/abi-lp64d-empty-unions.c | 26 +++++++++++++++++++ + 1 file changed, 26 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c + +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +new file mode 100644 +index 000000000000..b0607425336e +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -0,0 +1,26 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ ++// RUN: FileCheck --check-prefix=CHECK-C %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ ++// RUN: FileCheck --check-prefix=CHECK-CXX %s ++ ++#include ++ ++// CHECK-C: define{{.*}} void @test1() ++// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) ++union u1 { }; ++union u1 test1(union u1 a) { ++ return a; ++} ++ ++struct s1 { ++ union u1 u; ++ int i; ++ float f; ++}; ++ ++// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) ++/// FIXME: This doesn't match g++. ++// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) ++struct s1 test2(struct s1 a) { ++ return a; ++} +-- +2.20.1 + diff --git a/0014-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch b/0014-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch new file mode 100644 index 0000000..3d391e4 --- /dev/null +++ b/0014-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch @@ -0,0 +1,69 @@ +From 313138921cd84f1be06e2ecbf0f6e854242bb8b9 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Sat, 4 Nov 2023 10:04:37 +0800 +Subject: [PATCH 14/17] [LoongArch] Fix ABI mismatch with g++ when handling + empty unions (#71025) + +In g++, empty unions are not ignored like empty structs when flattening +structs to examine whether the structs can be passed via FARs in C++. +This patch aligns clang++ with g++. + +Fix https://github.com/llvm/llvm-project/issues/70890. + +(cherry picked from commit 4253fdc2c462da61cc0deb74a43265665720c828) +Change-Id: Ib9b0cc6e74f8e52ff394fc53b7fc9f4602d64096 +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 ++++--- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 2 +- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c | 3 +-- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index bc508a99da9c..63b9a1fdb988 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -170,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; +- if (isEmptyRecord(getContext(), Ty, true, true)) +- return true; + const RecordDecl *RD = RTy->getDecl(); +- // Unions aren't eligible unless they're empty (which is caught above). ++ if (isEmptyRecord(getContext(), Ty, true, true) && ++ (!RD->isUnion() || !isa(RD))) ++ return true; ++ // Unions aren't eligible unless they're empty in C (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index 281b7b15841a..2f7596f0ebdc 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -3,7 +3,7 @@ + // RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ + // RUN: FileCheck --check-prefix=CHECK-CXX %s + +-// Fields containing empty structs or unions are ignored when flattening ++// Fields containing empty structs are ignored when flattening + // structs to examine whether the structs can be passed via FARs, even in C++. + // But there is an exception that non-zero-length array of empty structures are + // not ignored in C++. These rules are not documented in psABI +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +index b0607425336e..363e37efb646 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -19,8 +19,7 @@ struct s1 { + }; + + // CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) +-/// FIXME: This doesn't match g++. +-// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) ++// CHECK-CXX: define{{.*}} [2 x i64] @_Z5test22s1([2 x i64]{{[^,]*}}) + struct s1 test2(struct s1 a) { + return a; + } +-- +2.20.1 + diff --git a/0002-Driver-Default-LoongArch-to-fno-direct-access-extern.patch b/0015-Driver-Default-LoongArch-to-fno-direct-access-extern.patch similarity index 94% rename from 0002-Driver-Default-LoongArch-to-fno-direct-access-extern.patch rename to 0015-Driver-Default-LoongArch-to-fno-direct-access-extern.patch index bd72350..1a0e520 100644 --- a/0002-Driver-Default-LoongArch-to-fno-direct-access-extern.patch +++ b/0015-Driver-Default-LoongArch-to-fno-direct-access-extern.patch @@ -1,7 +1,7 @@ -From a92862b6612e45851e2598f96a400ba741b3684c Mon Sep 17 00:00:00 2001 +From 896bdd6b1b7bbfc0b157ad1aeee724973b58bb62 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 14 Nov 2023 00:43:40 -0800 -Subject: [PATCH] [Driver] Default LoongArch to +Subject: [PATCH 15/17] [Driver] Default LoongArch to -fno-direct-access-external-data for non-PIC (#72221) For -fno-pic, if an extern variable is defined in a DSO, a copy @@ -16,6 +16,7 @@ Keep Frontend conditions unchanged (-fdirect-access-external-data || Fix #71645 (cherry picked from commit 47eeee297775347cbdb7624d6a766c2a3eec4a59) +Change-Id: I9be865d2a200fcaf6e1aca6e784d85410f7f56ab --- clang/lib/Driver/ToolChains/Clang.cpp | 7 ++++++- clang/test/Driver/fdirect-access-external-data.c | 6 ++++++ diff --git a/0016-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch b/0016-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch new file mode 100644 index 0000000..b1a587d --- /dev/null +++ b/0016-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch @@ -0,0 +1,109 @@ +From 150f40b2bd7023643e14d734bd4f554a8107bdbb Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 5 Mar 2024 19:44:28 +0800 +Subject: [PATCH 16/17] [Clang][LoongArch] Precommit test for fix wrong return + value type of __iocsrrd_h. NFC + +(cherry picked from commit aeda1a6e800e0dd6c91c0332b4db95094ad5b301) +(cherry picked from commit a9ba36c7e7d7fa076f201843e3b826b6c6d7f5ef) +Change-Id: I01c66e5b98ea9f53ba76eb0a741b40cbe1a5d950 +--- + clang/test/CodeGen/LoongArch/intrinsic-la32.c | 29 ++++++++++++++----- + clang/test/CodeGen/LoongArch/intrinsic-la64.c | 21 ++++++++++++-- + 2 files changed, 40 insertions(+), 10 deletions(-) + +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +index 93d54f511a9c..6a8d99880be3 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +@@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { + + // LA32-LABEL: @rdtime( + // LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +-// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 ++// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] ++// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] + // LA32-NEXT: ret void + // + void rdtime() { +@@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { + __builtin_loongarch_movgr2fcsr(1, a); + } + +-// CHECK-LABEL: @cacop_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +-// CHECK-NEXT: ret void ++// LA32-LABEL: @cacop_w( ++// LA32-NEXT: entry: ++// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) ++// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) ++// LA32-NEXT: ret void + // + void cacop_w(unsigned long int a) { + __cacop_w(1, a, 1024); + __builtin_loongarch_cacop_w(1, a, 1024); + } ++ ++// LA32-LABEL: @iocsrrd_h_result( ++// LA32-NEXT: entry: ++// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) ++// LA32-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 ++// LA32-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] ++// LA32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 ++// LA32-NEXT: ret i16 [[CONV4]] ++// ++unsigned short iocsrrd_h_result(unsigned int a) { ++ unsigned short b = __iocsrrd_h(a); ++ unsigned short c = __builtin_loongarch_iocsrrd_h(a); ++ return b+c; ++} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +index a740882eef54..48b6a7a3d227 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +@@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { + + // CHECK-LABEL: @rdtime_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] + // CHECK-NEXT: ret void + // + void rdtime_d() { +@@ -396,8 +396,8 @@ void rdtime_d() { + + // CHECK-LABEL: @rdtime( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 ++// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] ++// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] + // CHECK-NEXT: ret void + // + void rdtime() { +@@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { + __movgr2fcsr(1, a); + __builtin_loongarch_movgr2fcsr(1, a); + } ++ ++// CHECK-LABEL: @iocsrrd_h_result( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) ++// CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 ++// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] ++// CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 ++// CHECK-NEXT: ret i16 [[CONV4]] ++// ++unsigned short iocsrrd_h_result(unsigned int a) { ++ unsigned short b = __iocsrrd_h(a); ++ unsigned short c = __builtin_loongarch_iocsrrd_h(a); ++ return b+c; ++} +-- +2.20.1 + diff --git a/0017-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch b/0017-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch new file mode 100644 index 0000000..be99df9 --- /dev/null +++ b/0017-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch @@ -0,0 +1,73 @@ +From a18df7b21762bef413dfa2c9a2711860fc9678b3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 6 Mar 2024 10:03:28 +0800 +Subject: [PATCH 17/17] [Clang][LoongArch] Fix wrong return value type of + __iocsrrd_h (#84100) + +relate: +https: //gcc.gnu.org/pipermail/gcc-patches/2024-February/645016.html +(cherry picked from commit 2f479b811274fede36535e34ecb545ac22e399c3) +(cherry picked from commit 9b9aee16d4dcf1b4af49988ebd7918fa4ce77e44) +Change-Id: I3627918ac38a03620db9b1efbe874f14fab675f6 +--- + clang/lib/Headers/larchintrin.h | 2 +- + clang/test/CodeGen/LoongArch/intrinsic-la32.c | 8 ++++---- + clang/test/CodeGen/LoongArch/intrinsic-la64.c | 8 ++++---- + 3 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h +index c5c533ee0b8c..24dd29ce91ff 100644 +--- a/clang/lib/Headers/larchintrin.h ++++ b/clang/lib/Headers/larchintrin.h +@@ -156,7 +156,7 @@ extern __inline unsigned char + return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); + } + +-extern __inline unsigned char ++extern __inline unsigned short + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h(unsigned int _1) { + return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +index 6a8d99880be3..eb3f8cbe7ac4 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +@@ -215,11 +215,11 @@ void cacop_w(unsigned long int a) { + // LA32-LABEL: @iocsrrd_h_result( + // LA32-NEXT: entry: + // LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 + // LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// LA32-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 +-// LA32-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] +-// LA32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 +-// LA32-NEXT: ret i16 [[CONV4]] ++// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ++// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] ++// LA32-NEXT: ret i16 [[CONV3]] + // + unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +index 48b6a7a3d227..50ec358f546e 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +@@ -431,11 +431,11 @@ void loongarch_movgr2fcsr(int a) { + // CHECK-LABEL: @iocsrrd_h_result( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) ++// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 + // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 +-// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] +-// CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 +-// CHECK-NEXT: ret i16 [[CONV4]] ++// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ++// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] ++// CHECK-NEXT: ret i16 [[CONV3]] + // + unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); +-- +2.20.1 + diff --git a/clang.spec b/clang.spec index c8a8e68..fbfeac9 100644 --- a/clang.spec +++ b/clang.spec @@ -1,4 +1,4 @@ -%define anolis_release 3 +%define anolis_release 4 %global toolchain clang @@ -51,7 +51,25 @@ Patch101: 0009-disable-recommonmark.patch # Add anolis support in driver Patch11: 1001-add-anolis-os-support-in-driver.patch Patch12: 0001-Clang-LoongArch-Add-the-triple-for-anolis-os.patch -Patch13: 0002-Driver-Default-LoongArch-to-fno-direct-access-extern.patch + +# Patches for LoongArch +Patch14: 0001-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch +Patch15: 0002-Clang-LoongArch-Support-compiler-options-mlsx-mlasx-.patch +Patch16: 0003-Clang-LoongArch-Add-ABI-implementation-of-passing-ve.patch +Patch17: 0004-Clang-LoongArch-Support-the-builtin-functions-for-LS.patch +Patch18: 0005-Clang-LoongArch-Support-the-builtin-functions-for-LA.patch +Patch19: 0006-LoongArch-CodeGen-Add-LSX-builtin-testcases.patch +Patch20: 0007-LoongArch-CodeGen-Add-LASX-builtin-testcases.patch +Patch21: 0008-Clang-LoongArch-Do-not-pass-vector-arguments-via-vec.patch +Patch22: 0009-Driver-Support-fsanitize-cfi-icall-on-loongarch64-67.patch +Patch23: 0010-Clang-LoongArch-Generate-_mcount-instead-of-mcount-6.patch +Patch24: 0011-LoongArch-test-Add-some-ABI-regression-tests-for-emp.patch +Patch25: 0012-LoongArch-Fix-ABI-mismatch-with-gcc-g-about-empty-st.patch +Patch26: 0013-LoongArch-Pre-commit-test-for-issue-70890.patch +Patch27: 0014-LoongArch-Fix-ABI-mismatch-with-g-when-handling-empt.patch +Patch28: 0015-Driver-Default-LoongArch-to-fno-direct-access-extern.patch +Patch29: 0016-Clang-LoongArch-Precommit-test-for-fix-wrong-return-.patch +Patch30: 0017-Clang-LoongArch-Fix-wrong-return-value-type-of-__ioc.patch # Patches for clang-tools-extra # See https://reviews.llvm.org/D120301 @@ -474,6 +492,11 @@ LD_LIBRARY_PATH=%{buildroot}/%{_libdir} %{__ninja} check-all -C %{__cmake_buildd %{python3_sitelib}/clang/ %changelog +* Tue Apr 9 2024 Chen Li - 17.0.6-4 +- LoongArch Backport: Add the support for vector(LSX/LASX) +- LoongArch Backport: Support -fsanitize=cfi-icall +- LoongArch Backport: Bugfix + * Mon Apr 1 2024 Chen Li - 17.0.6-3 - LoongArch: Add the triple for anolis os - LoongArch: Fix Segmentation fault when pie is disabled -- Gitee